forked from OSchip/llvm-project
[PowerPC] Remove QPX/A2Q BGQ/BGP CNK support
Per RFC http://lists.llvm.org/pipermail/llvm-dev/2020-April/141295.html no one is making use of QPX/A2Q/BGQ/BGP CNK anymore. This patch remove the support of QPX/A2Q in llvm, BGQ/BGP in clang, CNK support in openmp/polly. Reviewed By: hfinkel Differential Revision: https://reviews.llvm.org/D83915
This commit is contained in:
parent
fbe911ee75
commit
adffce7153
|
@ -46,8 +46,6 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
HasP8Crypto = true;
|
||||
} else if (Feature == "+direct-move") {
|
||||
HasDirectMove = true;
|
||||
} else if (Feature == "+qpx") {
|
||||
HasQPX = true;
|
||||
} else if (Feature == "+htm") {
|
||||
HasHTM = true;
|
||||
} else if (Feature == "+float128") {
|
||||
|
@ -99,7 +97,7 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
}
|
||||
|
||||
// ABI options.
|
||||
if (ABI == "elfv1" || ABI == "elfv1-qpx")
|
||||
if (ABI == "elfv1")
|
||||
Builder.defineMacro("_CALL_ELF", "1");
|
||||
if (ABI == "elfv2")
|
||||
Builder.defineMacro("_CALL_ELF", "2");
|
||||
|
@ -159,22 +157,11 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
Builder.defineMacro("_ARCH_PWR10");
|
||||
if (ArchDefs & ArchDefineA2)
|
||||
Builder.defineMacro("_ARCH_A2");
|
||||
if (ArchDefs & ArchDefineA2q) {
|
||||
Builder.defineMacro("_ARCH_A2Q");
|
||||
Builder.defineMacro("_ARCH_QP");
|
||||
}
|
||||
if (ArchDefs & ArchDefineE500)
|
||||
Builder.defineMacro("__NO_LWSYNC__");
|
||||
if (ArchDefs & ArchDefineFuture)
|
||||
Builder.defineMacro("_ARCH_PWR_FUTURE");
|
||||
|
||||
if (getTriple().getVendor() == llvm::Triple::BGQ) {
|
||||
Builder.defineMacro("__bg__");
|
||||
Builder.defineMacro("__THW_BLUEGENE__");
|
||||
Builder.defineMacro("__bgq__");
|
||||
Builder.defineMacro("__TOS_BGQ__");
|
||||
}
|
||||
|
||||
if (HasAltivec) {
|
||||
Builder.defineMacro("__VEC__", "10206");
|
||||
Builder.defineMacro("__ALTIVEC__");
|
||||
|
@ -277,7 +264,6 @@ bool PPCTargetInfo::initFeatureMap(
|
|||
.Case("ppc64le", true)
|
||||
.Default(false);
|
||||
|
||||
Features["qpx"] = (CPU == "a2q");
|
||||
Features["power9-vector"] = (CPU == "pwr9");
|
||||
Features["crypto"] = llvm::StringSwitch<bool>(CPU)
|
||||
.Case("ppc64le", true)
|
||||
|
@ -373,7 +359,6 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
|
|||
.Case("power8-vector", HasP8Vector)
|
||||
.Case("crypto", HasP8Crypto)
|
||||
.Case("direct-move", HasDirectMove)
|
||||
.Case("qpx", HasQPX)
|
||||
.Case("htm", HasHTM)
|
||||
.Case("bpermd", HasBPERMD)
|
||||
.Case("extdiv", HasExtDiv)
|
||||
|
@ -503,17 +488,17 @@ ArrayRef<TargetInfo::AddlRegName> PPCTargetInfo::getGCCAddlRegNames() const {
|
|||
}
|
||||
|
||||
static constexpr llvm::StringLiteral ValidCPUNames[] = {
|
||||
{"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
|
||||
{"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
|
||||
{"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
|
||||
{"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
|
||||
{"g5"}, {"a2"}, {"a2q"}, {"e500"}, {"e500mc"},
|
||||
{"e5500"}, {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"},
|
||||
{"power5"}, {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"},
|
||||
{"pwr6"}, {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"},
|
||||
{"power8"}, {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"},
|
||||
{"pwr10"}, {"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"},
|
||||
{"powerpc64le"}, {"ppc64le"}, {"future"}};
|
||||
{"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
|
||||
{"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
|
||||
{"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
|
||||
{"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
|
||||
{"g5"}, {"a2"}, {"e500"}, {"e500mc"}, {"e5500"},
|
||||
{"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, {"power5"},
|
||||
{"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, {"pwr6"},
|
||||
{"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, {"power8"},
|
||||
{"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, {"pwr10"},
|
||||
{"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"}, {"powerpc64le"},
|
||||
{"ppc64le"}, {"future"}};
|
||||
|
||||
bool PPCTargetInfo::isValidCPUName(StringRef Name) const {
|
||||
return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
|
||||
|
|
|
@ -46,7 +46,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
|
|||
ArchDefinePwr10 = 1 << 14,
|
||||
ArchDefineFuture = 1 << 15,
|
||||
ArchDefineA2 = 1 << 16,
|
||||
ArchDefineA2q = 1 << 17,
|
||||
ArchDefineE500 = 1 << 18
|
||||
} ArchDefineTypes;
|
||||
|
||||
|
@ -63,7 +62,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
|
|||
bool HasP8Vector = false;
|
||||
bool HasP8Crypto = false;
|
||||
bool HasDirectMove = false;
|
||||
bool HasQPX = false;
|
||||
bool HasHTM = false;
|
||||
bool HasBPERMD = false;
|
||||
bool HasExtDiv = false;
|
||||
|
@ -118,7 +116,6 @@ public:
|
|||
.Case("970", ArchDefineName | ArchDefinePwr4 | ArchDefinePpcgr |
|
||||
ArchDefinePpcsq)
|
||||
.Case("a2", ArchDefineA2)
|
||||
.Case("a2q", ArchDefineName | ArchDefineA2 | ArchDefineA2q)
|
||||
.Cases("power3", "pwr3", ArchDefinePpcgr)
|
||||
.Cases("power4", "pwr4",
|
||||
ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
|
||||
|
|
|
@ -57,7 +57,6 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) {
|
|||
.Case("970", "970")
|
||||
.Case("G5", "g5")
|
||||
.Case("a2", "a2")
|
||||
.Case("a2q", "a2q")
|
||||
.Case("e500", "e500")
|
||||
.Case("e500mc", "e500mc")
|
||||
.Case("e5500", "e5500")
|
||||
|
|
|
@ -1883,18 +1883,6 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
|
|||
if (T.isOSBinFormatELF()) {
|
||||
switch (getToolChain().getArch()) {
|
||||
case llvm::Triple::ppc64: {
|
||||
// When targeting a processor that supports QPX, or if QPX is
|
||||
// specifically enabled, default to using the ABI that supports QPX (so
|
||||
// long as it is not specifically disabled).
|
||||
bool HasQPX = false;
|
||||
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
|
||||
HasQPX = A->getValue() == StringRef("a2q");
|
||||
HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX);
|
||||
if (HasQPX) {
|
||||
ABIName = "elfv1-qpx";
|
||||
break;
|
||||
}
|
||||
|
||||
if (T.isMusl() || (T.isOSFreeBSD() && T.getOSMajorVersion() >= 13))
|
||||
ABIName = "elfv2";
|
||||
else
|
||||
|
|
|
@ -167,12 +167,6 @@
|
|||
// PPCPWR8: "-cc1"
|
||||
// PPCPWR8: "-target-cpu" "pwr8"
|
||||
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu \
|
||||
// RUN: -### -S %s -mcpu=a2q 2>&1 | FileCheck -check-prefix=PPCA2Q %s
|
||||
// PPCA2Q: clang
|
||||
// PPCA2Q: "-cc1"
|
||||
// PPCA2Q: "-target-cpu" "a2q"
|
||||
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu \
|
||||
// RUN: -### -S %s -mcpu=630 2>&1 | FileCheck -check-prefix=PPC630 %s
|
||||
// PPC630: clang
|
||||
|
|
|
@ -5,14 +5,6 @@
|
|||
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mabi=elfv1 | FileCheck -check-prefix=CHECK-ELFv1 %s
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mabi=elfv1-qpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mcpu=a2q | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mcpu=a2 -mqpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mcpu=a2q -mno-qpx | FileCheck -check-prefix=CHECK-ELFv1 %s
|
||||
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2-BE %s
|
||||
|
||||
// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
|
@ -34,8 +26,6 @@
|
|||
// CHECK-ELFv1: "-target-abi" "elfv1"
|
||||
// CHECK-ELFv1-LE: "-mrelocation-model" "static"
|
||||
// CHECK-ELFv1-LE: "-target-abi" "elfv1"
|
||||
// CHECK-ELFv1-QPX: "-mrelocation-model" "static"
|
||||
// CHECK-ELFv1-QPX: "-target-abi" "elfv1-qpx"
|
||||
// CHECK-ELFv2: "-mrelocation-model" "static"
|
||||
// CHECK-ELFv2: "-target-abi" "elfv2"
|
||||
// CHECK-ELFv2-BE: "-mrelocation-model" "static"
|
||||
|
@ -48,14 +38,6 @@
|
|||
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mabi=elfv1 | FileCheck -check-prefix=CHECK-ELFv1-PIC %s
|
||||
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mabi=elfv1-qpx | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
|
||||
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mcpu=a2q | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
|
||||
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mcpu=a2 -mqpx | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
|
||||
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mcpu=a2q -mno-qpx | FileCheck -check-prefix=CHECK-ELFv1-PIC %s
|
||||
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2-PIC %s
|
||||
|
||||
// RUN: %clang -fPIC -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 \
|
||||
|
@ -69,8 +51,6 @@
|
|||
|
||||
// CHECK-ELFv1-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
|
||||
// CHECK-ELFv1-PIC: "-target-abi" "elfv1"
|
||||
// CHECK-ELFv1-QPX-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
|
||||
// CHECK-ELFv1-QPX-PIC: "-target-abi" "elfv1-qpx"
|
||||
// CHECK-ELFv2-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
|
||||
// CHECK-ELFv2-PIC: "-target-abi" "elfv2"
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@
|
|||
// PPC: error: unknown target CPU 'not-a-cpu'
|
||||
// PPC: note: valid target CPU values are: generic, 440, 450, 601, 602, 603,
|
||||
// PPC-SAME: 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750,
|
||||
// PPC-SAME: 8548, 970, g5, a2, a2q, e500, e500mc, e5500, power3, pwr3, power4,
|
||||
// PPC-SAME: 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4,
|
||||
// PPC-SAME: pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x,
|
||||
// PPC-SAME: power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, powerpc64,
|
||||
// PPC-SAME: ppc64, powerpc64le, ppc64le, future
|
||||
|
|
|
@ -408,21 +408,6 @@
|
|||
// PPC64LE:#define __ppc64__ 1
|
||||
// PPC64LE:#define __ppc__ 1
|
||||
//
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu a2q -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCA2Q %s
|
||||
//
|
||||
// PPCA2Q:#define _ARCH_A2 1
|
||||
// PPCA2Q:#define _ARCH_A2Q 1
|
||||
// PPCA2Q:#define _ARCH_PPC 1
|
||||
// PPCA2Q:#define _ARCH_PPC64 1
|
||||
// PPCA2Q:#define _ARCH_QP 1
|
||||
//
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-bgq-linux -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCBGQ %s
|
||||
//
|
||||
// PPCBGQ:#define __THW_BLUEGENE__ 1
|
||||
// PPCBGQ:#define __TOS_BGQ__ 1
|
||||
// PPCBGQ:#define __bg__ 1
|
||||
// PPCBGQ:#define __bgq__ 1
|
||||
//
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu 630 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPC630 %s
|
||||
//
|
||||
// PPC630:#define _ARCH_630 1
|
||||
|
@ -1069,7 +1054,6 @@
|
|||
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1-qpx < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s
|
||||
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
|
||||
|
|
|
@ -4310,14 +4310,9 @@ PowerPC:
|
|||
- ``r``: A 32 or 64-bit integer register.
|
||||
- ``b``: A 32 or 64-bit integer register, excluding ``R0`` (that is:
|
||||
``R1-R31``).
|
||||
- ``f``: A 32 or 64-bit float register (``F0-F31``), or when QPX is enabled, a
|
||||
128 or 256-bit QPX register (``Q0-Q31``; aliases the ``F`` registers).
|
||||
- ``v``: For ``4 x f32`` or ``4 x f64`` types, when QPX is enabled, a
|
||||
128 or 256-bit QPX register (``Q0-Q31``), otherwise a 128-bit
|
||||
altivec vector register (``V0-V31``).
|
||||
|
||||
.. FIXME: is this a bug that v accepts QPX registers? I think this
|
||||
is supposed to only use the altivec vector registers?
|
||||
- ``f``: A 32 or 64-bit float register (``F0-F31``),
|
||||
- ``v``: For ``4 x f32`` or ``4 x f64`` types, a 128-bit altivec vector
|
||||
register (``V0-V31``).
|
||||
|
||||
- ``y``: Condition register (``CR0-CR7``).
|
||||
- ``wc``: An individual CR bit in a CR register.
|
||||
|
|
|
@ -142,8 +142,6 @@ public:
|
|||
Apple,
|
||||
PC,
|
||||
SCEI,
|
||||
BGP,
|
||||
BGQ,
|
||||
Freescale,
|
||||
IBM,
|
||||
ImaginationTechnologies,
|
||||
|
@ -179,7 +177,6 @@ public:
|
|||
Minix,
|
||||
RTEMS,
|
||||
NaCl, // Native Client
|
||||
CNK, // BG/P Compute-Node Kernel
|
||||
AIX,
|
||||
CUDA, // NVIDIA CUDA
|
||||
NVCL, // NVIDIA OpenCL
|
||||
|
|
|
@ -1109,182 +1109,6 @@ def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">,
|
|||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC QPX Intrinsics.
|
||||
//
|
||||
|
||||
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
|
||||
/// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
|
||||
class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
|
||||
list<LLVMType> param_types,
|
||||
list<IntrinsicProperty> properties>
|
||||
: GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
|
||||
Intrinsic<ret_types, param_types, properties>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC QPX Intrinsic Class Definitions.
|
||||
//
|
||||
|
||||
/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
|
||||
/// vector and returns one. These intrinsics have no side effects.
|
||||
class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
|
||||
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
|
||||
[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
|
||||
|
||||
/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
|
||||
/// vectors and returns one. These intrinsics have no side effects.
|
||||
class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
|
||||
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
|
||||
[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
|
||||
/// vectors and returns one. These intrinsics have no side effects.
|
||||
class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
|
||||
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
|
||||
[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
|
||||
/// and returns a v4f64.
|
||||
class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
|
||||
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
|
||||
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
|
||||
/// and returns a v4f64 permutation.
|
||||
class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
|
||||
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
|
||||
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
|
||||
|
||||
/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
|
||||
/// and stores a v4f64.
|
||||
class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
|
||||
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
|
||||
[], [llvm_v4f64_ty, llvm_ptr_ty],
|
||||
[IntrWriteMem, IntrArgMemOnly]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC QPX Intrinsic Definitions.
|
||||
|
||||
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
|
||||
// Add Instructions
|
||||
def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
|
||||
def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
|
||||
def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
|
||||
def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
|
||||
|
||||
// Estimate Instructions
|
||||
def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
|
||||
def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
|
||||
def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
|
||||
def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
|
||||
|
||||
// Multiply Instructions
|
||||
def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
|
||||
def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
|
||||
def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
|
||||
def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
|
||||
|
||||
// Multiply-add instructions
|
||||
def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
|
||||
def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
|
||||
def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
|
||||
def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
|
||||
def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
|
||||
def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
|
||||
def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
|
||||
def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
|
||||
def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
|
||||
def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
|
||||
def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
|
||||
def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
|
||||
def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
|
||||
def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
|
||||
def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
|
||||
def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
|
||||
|
||||
// Select Instruction
|
||||
def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
|
||||
|
||||
// Permute Instruction
|
||||
def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
|
||||
|
||||
// Convert and Round Instructions
|
||||
def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
|
||||
def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
|
||||
def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
|
||||
def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
|
||||
def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
|
||||
def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
|
||||
def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
|
||||
def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
|
||||
def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
|
||||
def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
|
||||
def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
|
||||
def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
|
||||
def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
|
||||
def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
|
||||
def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
|
||||
def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
|
||||
def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
|
||||
|
||||
// Move Instructions
|
||||
def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
|
||||
def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
|
||||
def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
|
||||
def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
|
||||
|
||||
// Compare Instructions
|
||||
def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
|
||||
def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
|
||||
def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
|
||||
def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
|
||||
|
||||
// Load instructions
|
||||
def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
|
||||
def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
|
||||
def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
|
||||
def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
|
||||
|
||||
def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
|
||||
def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
|
||||
def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
|
||||
def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
|
||||
def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
|
||||
def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
|
||||
def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
|
||||
def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
|
||||
|
||||
def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
|
||||
def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
|
||||
def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
|
||||
def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
|
||||
|
||||
// Store instructions
|
||||
def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
|
||||
def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
|
||||
def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
|
||||
def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
|
||||
|
||||
def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
|
||||
def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
|
||||
def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
|
||||
def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
|
||||
def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
|
||||
def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
|
||||
|
||||
// Logical and permutation formation
|
||||
def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
|
||||
[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
|
||||
[llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC HTM Intrinsic Definitions.
|
||||
|
||||
|
|
|
@ -160,8 +160,6 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
|
|||
|
||||
case AMD: return "amd";
|
||||
case Apple: return "apple";
|
||||
case BGP: return "bgp";
|
||||
case BGQ: return "bgq";
|
||||
case CSR: return "csr";
|
||||
case Freescale: return "fsl";
|
||||
case IBM: return "ibm";
|
||||
|
@ -187,7 +185,6 @@ StringRef Triple::getOSTypeName(OSType Kind) {
|
|||
case AMDHSA: return "amdhsa";
|
||||
case AMDPAL: return "amdpal";
|
||||
case Ananas: return "ananas";
|
||||
case CNK: return "cnk";
|
||||
case CUDA: return "cuda";
|
||||
case CloudABI: return "cloudabi";
|
||||
case Contiki: return "contiki";
|
||||
|
@ -470,8 +467,6 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
|
|||
.Case("apple", Triple::Apple)
|
||||
.Case("pc", Triple::PC)
|
||||
.Case("scei", Triple::SCEI)
|
||||
.Case("bgp", Triple::BGP)
|
||||
.Case("bgq", Triple::BGQ)
|
||||
.Case("fsl", Triple::Freescale)
|
||||
.Case("ibm", Triple::IBM)
|
||||
.Case("img", Triple::ImaginationTechnologies)
|
||||
|
@ -508,7 +503,6 @@ static Triple::OSType parseOS(StringRef OSName) {
|
|||
.StartsWith("minix", Triple::Minix)
|
||||
.StartsWith("rtems", Triple::RTEMS)
|
||||
.StartsWith("nacl", Triple::NaCl)
|
||||
.StartsWith("cnk", Triple::CNK)
|
||||
.StartsWith("aix", Triple::AIX)
|
||||
.StartsWith("cuda", Triple::CUDA)
|
||||
.StartsWith("nvcl", Triple::NVCL)
|
||||
|
|
|
@ -492,21 +492,6 @@ public:
|
|||
Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
|
||||
}
|
||||
|
||||
void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
|
||||
}
|
||||
|
||||
void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
|
||||
}
|
||||
|
||||
void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
|
||||
}
|
||||
|
||||
void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(N == 1 && "Invalid number of operands!");
|
||||
Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
|
||||
|
@ -1207,9 +1192,6 @@ bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
|
|||
} else if (Name.startswith_lower("v") &&
|
||||
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
|
||||
RegNo = VRegs[IntVal];
|
||||
} else if (Name.startswith_lower("q") &&
|
||||
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
|
||||
RegNo = QFRegs[IntVal];
|
||||
} else if (Name.startswith_lower("cr") &&
|
||||
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
|
||||
RegNo = CRRegs[IntVal];
|
||||
|
|
|
@ -36,7 +36,6 @@ add_llvm_target(PowerPCCodeGen
|
|||
PPCMacroFusion.cpp
|
||||
PPCMIPeephole.cpp
|
||||
PPCRegisterInfo.cpp
|
||||
PPCQPXLoadSplat.cpp
|
||||
PPCSubtarget.cpp
|
||||
PPCTargetMachine.cpp
|
||||
PPCTargetObjectFile.cpp
|
||||
|
|
|
@ -167,12 +167,6 @@ static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
|
|||
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
|
||||
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
|
||||
|
||||
static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
|
||||
uint64_t Address,
|
||||
const void *Decoder) {
|
||||
return decodeRegisterClass(Inst, RegNo, QFRegs);
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
|
||||
uint64_t Address,
|
||||
const void *Decoder) {
|
||||
|
@ -401,14 +395,9 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
|||
// Read the instruction in the proper endianness.
|
||||
uint64_t Inst = ReadFunc(Bytes.data());
|
||||
|
||||
if (STI.getFeatureBits()[PPC::FeatureQPX]) {
|
||||
if (STI.getFeatureBits()[PPC::FeatureSPE]) {
|
||||
DecodeStatus result =
|
||||
decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
|
||||
if (result != MCDisassembler::Fail)
|
||||
return result;
|
||||
} else if (STI.getFeatureBits()[PPC::FeatureSPE]) {
|
||||
DecodeStatus result =
|
||||
decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
|
||||
decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
|
||||
if (result != MCDisassembler::Fail)
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -49,18 +49,6 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
|
|||
|
||||
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
|
||||
const char *RegName = getRegisterName(RegNo);
|
||||
if (RegName[0] == 'q' /* QPX */) {
|
||||
// The system toolchain on the BG/Q does not understand QPX register names
|
||||
// in .cfi_* directives, so print the name of the floating-point
|
||||
// subregister instead.
|
||||
std::string RN(RegName);
|
||||
|
||||
RN[0] = 'f';
|
||||
OS << RN;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
OS << RegName;
|
||||
}
|
||||
|
||||
|
|
|
@ -159,7 +159,6 @@ using llvm::MCPhysReg;
|
|||
static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
|
||||
static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
|
||||
static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
|
||||
static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
|
||||
static const MCPhysReg RRegsNoR0[32] = \
|
||||
PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
|
||||
static const MCPhysReg XRegsNoX0[32] = \
|
||||
|
|
|
@ -44,7 +44,6 @@ namespace llvm {
|
|||
FunctionPass *createPPCMIPeepholePass();
|
||||
FunctionPass *createPPCBranchSelectionPass();
|
||||
FunctionPass *createPPCBranchCoalescingPass();
|
||||
FunctionPass *createPPCQPXLoadSplatPass();
|
||||
FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
|
||||
FunctionPass *createPPCTLSDynamicCallPass();
|
||||
FunctionPass *createPPCBoolRetToIntPass();
|
||||
|
@ -68,7 +67,6 @@ namespace llvm {
|
|||
void initializePPCReduceCRLogicalsPass(PassRegistry&);
|
||||
void initializePPCBSelPass(PassRegistry&);
|
||||
void initializePPCBranchCoalescingPass(PassRegistry&);
|
||||
void initializePPCQPXLoadSplatPass(PassRegistry&);
|
||||
void initializePPCBoolRetToIntPass(PassRegistry&);
|
||||
void initializePPCExpandISELPass(PassRegistry &);
|
||||
void initializePPCPreEmitPeepholePass(PassRegistry &);
|
||||
|
|
|
@ -132,9 +132,6 @@ def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
|
|||
"Enable PPC 4xx instructions">;
|
||||
def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
|
||||
"Enable PPC 6xx instructions">;
|
||||
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
|
||||
"Enable QPX instructions",
|
||||
[FeatureFPU]>;
|
||||
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
|
||||
"Enable VSX instructions",
|
||||
[FeatureAltivec]>;
|
||||
|
@ -193,7 +190,7 @@ def FeatureFloat128 :
|
|||
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD",
|
||||
"POPCNTD_Fast",
|
||||
"Enable the popcnt[dw] instructions">;
|
||||
// Note that for the a2/a2q processor models we should not use popcnt[dw] by
|
||||
// Note that for the a2 processor models we should not use popcnt[dw] by
|
||||
// default. These processors do support the instructions, but they're
|
||||
// microcoded, and the software emulation is about twice as fast.
|
||||
def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
|
||||
|
@ -514,15 +511,6 @@ def : ProcessorModel<"a2", PPCA2Model,
|
|||
FeatureFPRND, FeatureFPCVT, FeatureISEL,
|
||||
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
|
||||
Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
|
||||
def : ProcessorModel<"a2q", PPCA2Model,
|
||||
[DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
|
||||
FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
|
||||
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
|
||||
FeatureSTFIWX, FeatureLFIWAX,
|
||||
FeatureFPRND, FeatureFPCVT, FeatureISEL,
|
||||
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
|
||||
Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
|
||||
FeatureMFTB]>;
|
||||
def : ProcessorModel<"pwr3", G5Model,
|
||||
[DirectivePwr3, FeatureAltivec,
|
||||
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
|
||||
|
|
|
@ -549,9 +549,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
|||
if (Subtarget->hasSPE()) {
|
||||
if (PPC::F4RCRegClass.contains(Reg) ||
|
||||
PPC::F8RCRegClass.contains(Reg) ||
|
||||
PPC::QBRCRegClass.contains(Reg) ||
|
||||
PPC::QFRCRegClass.contains(Reg) ||
|
||||
PPC::QSRCRegClass.contains(Reg) ||
|
||||
PPC::VFRCRegClass.contains(Reg) ||
|
||||
PPC::VRRCRegClass.contains(Reg) ||
|
||||
PPC::VSFRCRegClass.contains(Reg) ||
|
||||
|
|
|
@ -61,9 +61,6 @@ def RetCC_PPC_Cold : CallingConv<[
|
|||
CCIfType<[f64], CCAssignToReg<[F1]>>,
|
||||
CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>,
|
||||
|
||||
CCIfType<[v4f64, v4f32, v4i1],
|
||||
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>,
|
||||
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
|
||||
CCIfSubtarget<"hasAltivec()",
|
||||
CCAssignToReg<[V2]>>>
|
||||
|
@ -98,10 +95,6 @@ def RetCC_PPC : CallingConv<[
|
|||
CCIfSubtarget<"hasP9Vector()",
|
||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
||||
|
||||
// QPX vectors are returned in QF1 and QF2.
|
||||
CCIfType<[v4f64, v4f32, v4i1],
|
||||
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
|
||||
|
||||
// Vector types returned as "direct" go into V2 .. V9; note that only the
|
||||
// ELFv2 ABI fully utilizes all these registers.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
|
||||
|
@ -158,8 +151,6 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
|
|||
CCIfType<[f128],
|
||||
CCIfSubtarget<"hasP9Vector()",
|
||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
||||
CCIfType<[v4f64, v4f32, v4i1],
|
||||
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
|
||||
CCIfSubtarget<"hasAltivec()",
|
||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
|
||||
|
@ -223,9 +214,6 @@ def CC_PPC32_SVR4_Common : CallingConv<[
|
|||
CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>,
|
||||
CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,
|
||||
|
||||
// QPX vectors that are stored in double precision need 32-byte alignment.
|
||||
CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
|
||||
|
||||
// Vectors and float128 get 16-byte stack slots that are 16-byte aligned.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>,
|
||||
CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>>
|
||||
|
@ -243,10 +231,6 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
|
|||
// put vector arguments in vector registers before putting them on the stack.
|
||||
let Entry = 1 in
|
||||
def CC_PPC32_SVR4 : CallingConv<[
|
||||
// QPX vectors mirror the scalar FP convention.
|
||||
CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
|
||||
CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
|
||||
|
||||
// The first 12 Vector arguments are passed in AltiVec registers.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
|
||||
CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
|
||||
|
|
|
@ -4142,7 +4142,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
|
|||
// Altivec Vector compare instructions do not set any CR register by default and
|
||||
// vector compare operations return the same type as the operands.
|
||||
if (LHS.getValueType().isVector()) {
|
||||
if (Subtarget->hasQPX() || Subtarget->hasSPE())
|
||||
if (Subtarget->hasSPE())
|
||||
return false;
|
||||
|
||||
EVT VecVT = LHS.getValueType();
|
||||
|
@ -4813,8 +4813,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
|
|||
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
|
||||
switch (LoadedVT.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("Invalid PPC load type!");
|
||||
case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
|
||||
case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
|
||||
case MVT::f64: Opcode = PPC::LFDUX; break;
|
||||
case MVT::f32: Opcode = PPC::LFSUX; break;
|
||||
case MVT::i32: Opcode = PPC::LWZUX; break;
|
||||
|
@ -5095,12 +5093,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
|
|||
SelectCCOp = PPC::SELECT_CC_F16;
|
||||
else if (Subtarget->hasSPE())
|
||||
SelectCCOp = PPC::SELECT_CC_SPE;
|
||||
else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
|
||||
SelectCCOp = PPC::SELECT_CC_QFRC;
|
||||
else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
|
||||
SelectCCOp = PPC::SELECT_CC_QSRC;
|
||||
else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
|
||||
SelectCCOp = PPC::SELECT_CC_QBRC;
|
||||
else if (N->getValueType(0) == MVT::v2f64 ||
|
||||
N->getValueType(0) == MVT::v2i64)
|
||||
SelectCCOp = PPC::SELECT_CC_VSRC;
|
||||
|
@ -5856,9 +5848,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
|
|||
case PPC::SELECT_I8:
|
||||
case PPC::SELECT_F4:
|
||||
case PPC::SELECT_F8:
|
||||
case PPC::SELECT_QFRC:
|
||||
case PPC::SELECT_QSRC:
|
||||
case PPC::SELECT_QBRC:
|
||||
case PPC::SELECT_SPE:
|
||||
case PPC::SELECT_SPE4:
|
||||
case PPC::SELECT_VRRC:
|
||||
|
@ -6177,9 +6166,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
|
|||
case PPC::SELECT_I8:
|
||||
case PPC::SELECT_F4:
|
||||
case PPC::SELECT_F8:
|
||||
case PPC::SELECT_QFRC:
|
||||
case PPC::SELECT_QSRC:
|
||||
case PPC::SELECT_QBRC:
|
||||
case PPC::SELECT_SPE:
|
||||
case PPC::SELECT_SPE4:
|
||||
case PPC::SELECT_VRRC:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -427,22 +427,6 @@ namespace llvm {
|
|||
/// => VABSDUW((XVNEGSP a), (XVNEGSP b))
|
||||
VABSD,
|
||||
|
||||
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
|
||||
QVFPERM,
|
||||
|
||||
/// QVGPCI = This corresponds to the QPX qvgpci instruction.
|
||||
QVGPCI,
|
||||
|
||||
/// QVALIGNI = This corresponds to the QPX qvaligni instruction.
|
||||
QVALIGNI,
|
||||
|
||||
/// QVESPLATI = This corresponds to the QPX qvesplati instruction.
|
||||
QVESPLATI,
|
||||
|
||||
/// QBFLT = Access the underlying QPX floating-point boolean
|
||||
/// representation.
|
||||
QBFLT,
|
||||
|
||||
/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
|
||||
/// lower (IDX=1) half of v4f32 to v2f64.
|
||||
FP_EXTEND_HALF,
|
||||
|
@ -519,10 +503,6 @@ namespace llvm {
|
|||
/// Store scalar integers from VSR.
|
||||
ST_VSR_SCAL_INT,
|
||||
|
||||
/// QBRC, CHAIN = QVLFSb CHAIN, Ptr
|
||||
/// The 4xf32 load used for v4i1 constants.
|
||||
QVLFSb,
|
||||
|
||||
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
|
||||
/// except they ensure that the compare input is zero-extended for
|
||||
/// sub-word versions because the atomic loads zero-extend.
|
||||
|
|
|
@ -642,7 +642,6 @@ class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
|
|||
let FRA = 0;
|
||||
}
|
||||
|
||||
// Used for QPX
|
||||
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: I<opcode, OOL, IOL, asmstr, itin> {
|
||||
|
@ -1781,14 +1780,6 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
|
|||
let Inst{31} = 0;
|
||||
}
|
||||
|
||||
// Used for QPX
|
||||
class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
|
||||
let FRA = 0;
|
||||
let FRC = 0;
|
||||
}
|
||||
|
||||
// 1.7.13 M-Form
|
||||
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
|
@ -2099,49 +2090,6 @@ class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr,
|
|||
let Inst{23-31} = xo;
|
||||
}
|
||||
|
||||
// Z23-Form (used by QPX)
|
||||
class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: I<opcode, OOL, IOL, asmstr, itin> {
|
||||
bits<5> FRT;
|
||||
bits<5> FRA;
|
||||
bits<5> FRB;
|
||||
bits<2> idx;
|
||||
|
||||
let Pattern = pattern;
|
||||
|
||||
bit RC = 0; // set by isRecordForm
|
||||
|
||||
let Inst{6-10} = FRT;
|
||||
let Inst{11-15} = FRA;
|
||||
let Inst{16-20} = FRB;
|
||||
let Inst{21-22} = idx;
|
||||
let Inst{23-30} = xo;
|
||||
let Inst{31} = RC;
|
||||
}
|
||||
|
||||
class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
|
||||
let FRB = 0;
|
||||
}
|
||||
|
||||
class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: I<opcode, OOL, IOL, asmstr, itin> {
|
||||
bits<5> FRT;
|
||||
bits<12> idx;
|
||||
|
||||
let Pattern = pattern;
|
||||
|
||||
bit RC = 0; // set by isRecordForm
|
||||
|
||||
let Inst{6-10} = FRT;
|
||||
let Inst{11-22} = idx;
|
||||
let Inst{23-30} = xo;
|
||||
let Inst{31} = RC;
|
||||
}
|
||||
|
||||
class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: I<opcode, OOL, IOL, asmstr, itin> {
|
||||
|
|
|
@ -259,16 +259,6 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
|
|||
case PPC::XVMULDP:
|
||||
case PPC::XVMULSP:
|
||||
case PPC::XSMULSP:
|
||||
// QPX Add:
|
||||
case PPC::QVFADD:
|
||||
case PPC::QVFADDS:
|
||||
case PPC::QVFADDSs:
|
||||
// QPX Multiply:
|
||||
case PPC::QVFMUL:
|
||||
case PPC::QVFMULS:
|
||||
case PPC::QVFMULSs:
|
||||
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
|
||||
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
|
||||
// Fixed point:
|
||||
// Multiply:
|
||||
case PPC::MULHD:
|
||||
|
@ -300,9 +290,7 @@ static const uint16_t FMAOpIdxInfo[][5] = {
|
|||
{PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
|
||||
{PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
|
||||
{PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
|
||||
{PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
|
||||
{PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
|
||||
{PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
|
||||
{PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1}};
|
||||
|
||||
// Check if an opcode is a FMA instruction. If it is, return the index in array
|
||||
// FMAOpIdxInfo. Otherwise, return -1.
|
||||
|
@ -666,7 +654,6 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
|
|||
case PPC::LI8:
|
||||
case PPC::LIS:
|
||||
case PPC::LIS8:
|
||||
case PPC::QVGPCI:
|
||||
case PPC::ADDIStocHA:
|
||||
case PPC::ADDIStocHA8:
|
||||
case PPC::ADDItocL:
|
||||
|
@ -1343,12 +1330,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
|
||||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
|
||||
Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
|
||||
else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
|
||||
Opc = PPC::QVFMR;
|
||||
else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
|
||||
Opc = PPC::QVFMRs;
|
||||
else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
|
||||
Opc = PPC::QVFMRb;
|
||||
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
|
||||
Opc = PPC::CROR;
|
||||
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
|
||||
|
@ -1393,12 +1374,6 @@ static unsigned getSpillIndex(const TargetRegisterClass *RC) {
|
|||
OpcodeIndex = SOK_VectorFloat4Spill;
|
||||
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
|
||||
OpcodeIndex = SOK_VRSaveSpill;
|
||||
} else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
|
||||
OpcodeIndex = SOK_QuadFloat8Spill;
|
||||
} else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
|
||||
OpcodeIndex = SOK_QuadFloat4Spill;
|
||||
} else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
|
||||
OpcodeIndex = SOK_QuadBitSpill;
|
||||
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
|
||||
OpcodeIndex = SOK_SpillToVSR;
|
||||
} else {
|
||||
|
|
|
@ -123,9 +123,6 @@ enum SpillOpcodeKey {
|
|||
SOK_VectorFloat8Spill,
|
||||
SOK_VectorFloat4Spill,
|
||||
SOK_VRSaveSpill,
|
||||
SOK_QuadFloat8Spill,
|
||||
SOK_QuadFloat4Spill,
|
||||
SOK_QuadBitSpill,
|
||||
SOK_SpillToVSR,
|
||||
SOK_SPESpill,
|
||||
SOK_LastOpcodeSpill // This must be last on the enum.
|
||||
|
@ -136,32 +133,28 @@ enum SpillOpcodeKey {
|
|||
{ \
|
||||
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
|
||||
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
|
||||
PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \
|
||||
PPC::SPILLTOVSR_LD, PPC::EVLDD \
|
||||
PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD, PPC::EVLDD \
|
||||
}
|
||||
|
||||
#define Pwr9LoadOpcodes \
|
||||
{ \
|
||||
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
|
||||
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
|
||||
PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \
|
||||
PPC::QVLFDXb, PPC::SPILLTOVSR_LD \
|
||||
PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD \
|
||||
}
|
||||
|
||||
#define Pwr8StoreOpcodes \
|
||||
{ \
|
||||
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
|
||||
PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
|
||||
PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \
|
||||
PPC::EVSTDD \
|
||||
PPC::SPILLTOVSR_ST, PPC::EVSTDD \
|
||||
}
|
||||
|
||||
#define Pwr9StoreOpcodes \
|
||||
{ \
|
||||
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
|
||||
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
|
||||
PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \
|
||||
PPC::SPILLTOVSR_ST \
|
||||
PPC::SPILL_VRSAVE, PPC::SPILLTOVSR_ST \
|
||||
}
|
||||
|
||||
// Initialize arrays for load and store spill opcodes on supported subtargets.
|
||||
|
@ -273,10 +266,10 @@ public:
|
|||
}
|
||||
|
||||
static bool isSameClassPhysRegCopy(unsigned Opcode) {
|
||||
unsigned CopyOpcodes[] =
|
||||
{ PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
|
||||
PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
|
||||
PPC::CROR, PPC::EVOR, -1U };
|
||||
unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR,
|
||||
PPC::VOR, PPC::XXLOR, PPC::XXLORf,
|
||||
PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR,
|
||||
PPC::EVOR, -1U};
|
||||
for (int i = 0; CopyOpcodes[i] != -1U; i++)
|
||||
if (Opcode == CopyOpcodes[i])
|
||||
return true;
|
||||
|
|
|
@ -203,16 +203,6 @@ def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
|
|||
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
|
||||
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
|
||||
|
||||
def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
|
||||
def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
|
||||
def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
|
||||
def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
|
||||
|
||||
def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
|
||||
|
||||
def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
|
||||
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
|
||||
|
||||
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
|
||||
|
@ -3461,7 +3451,6 @@ include "PPCInstrAltivec.td"
|
|||
include "PPCInstrSPE.td"
|
||||
include "PPCInstr64Bit.td"
|
||||
include "PPCInstrVSX.td"
|
||||
include "PPCInstrQPX.td"
|
||||
include "PPCInstrHTM.td"
|
||||
|
||||
def crnot : OutPatFrag<(ops node:$in),
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,161 +0,0 @@
|
|||
//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The QPX vector registers overlay the scalar floating-point registers, and
|
||||
// any scalar floating-point loads splat their value across all vector lanes.
|
||||
// Thus, if we have a scalar load followed by a splat, we can remove the splat
|
||||
// (i.e. replace the load with a load-and-splat pseudo instruction).
|
||||
//
|
||||
// This pass must run after anything that might do store-to-load forwarding.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PPC.h"
|
||||
#include "PPCInstrBuilder.h"
|
||||
#include "PPCInstrInfo.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "ppc-qpx-load-splat"
|
||||
|
||||
STATISTIC(NumSimplified, "Number of QPX load splats simplified");
|
||||
|
||||
namespace {
|
||||
struct PPCQPXLoadSplat : public MachineFunctionPass {
|
||||
static char ID;
|
||||
PPCQPXLoadSplat() : MachineFunctionPass(ID) {
|
||||
initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &Fn) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "PowerPC QPX Load Splat Simplification";
|
||||
}
|
||||
};
|
||||
char PPCQPXLoadSplat::ID = 0;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
|
||||
"PowerPC QPX Load Splat Simplification",
|
||||
false, false)
|
||||
|
||||
FunctionPass *llvm::createPPCQPXLoadSplatPass() {
|
||||
return new PPCQPXLoadSplat();
|
||||
}
|
||||
|
||||
bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
bool MadeChange = false;
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
|
||||
for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
|
||||
MachineBasicBlock *MBB = &*MFI;
|
||||
SmallVector<MachineInstr *, 4> Splats;
|
||||
|
||||
for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
|
||||
MachineInstr *MI = &*MBBI;
|
||||
|
||||
if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
|
||||
Splats.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
// We're looking for a sequence like this:
|
||||
// %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
|
||||
// %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
|
||||
|
||||
for (auto SI = Splats.begin(); SI != Splats.end();) {
|
||||
MachineInstr *SMI = *SI;
|
||||
Register SplatReg = SMI->getOperand(0).getReg();
|
||||
Register SrcReg = SMI->getOperand(1).getReg();
|
||||
|
||||
if (MI->modifiesRegister(SrcReg, TRI)) {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
SI = Splats.erase(SI);
|
||||
continue;
|
||||
case PPC::LFS:
|
||||
case PPC::LFD:
|
||||
case PPC::LFSU:
|
||||
case PPC::LFDU:
|
||||
case PPC::LFSUX:
|
||||
case PPC::LFDUX:
|
||||
case PPC::LFSX:
|
||||
case PPC::LFDX:
|
||||
case PPC::LFIWAX:
|
||||
case PPC::LFIWZX:
|
||||
if (SplatReg != SrcReg) {
|
||||
// We need to change the load to define the scalar subregister of
|
||||
// the QPX splat source register.
|
||||
unsigned SubRegIndex =
|
||||
TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
|
||||
Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
|
||||
|
||||
// Substitute both the explicit defined register, and also the
|
||||
// implicit def of the containing QPX register.
|
||||
MI->getOperand(0).setReg(SplatSubReg);
|
||||
MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
|
||||
}
|
||||
|
||||
SI = Splats.erase(SI);
|
||||
|
||||
// If SMI is directly after MI, then MBBI's base iterator is
|
||||
// pointing at SMI. Adjust MBBI around the call to erase SMI to
|
||||
// avoid invalidating MBBI.
|
||||
++MBBI;
|
||||
SMI->eraseFromParent();
|
||||
--MBBI;
|
||||
|
||||
++NumSimplified;
|
||||
MadeChange = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If this instruction defines the splat register, then we cannot move
|
||||
// the previous definition above it. If it reads from the splat
|
||||
// register, then it must already be alive from some previous
|
||||
// definition, and if the splat register is different from the source
|
||||
// register, then this definition must not be the load for which we're
|
||||
// searching.
|
||||
if (MI->modifiesRegister(SplatReg, TRI) ||
|
||||
(SrcReg != SplatReg &&
|
||||
MI->readsRegister(SplatReg, TRI))) {
|
||||
SI = Splats.erase(SI);
|
||||
continue;
|
||||
}
|
||||
|
||||
++SI;
|
||||
}
|
||||
|
||||
if (MI->getOpcode() != PPC::QVESPLATI &&
|
||||
MI->getOpcode() != PPC::QVESPLATIs &&
|
||||
MI->getOpcode() != PPC::QVESPLATIb)
|
||||
continue;
|
||||
if (MI->getOperand(2).getImm() != 0)
|
||||
continue;
|
||||
|
||||
// If there are other uses of the scalar value after this, replacing
|
||||
// those uses might be non-trivial.
|
||||
if (!MI->getOperand(1).isKill())
|
||||
continue;
|
||||
|
||||
Splats.push_back(MI);
|
||||
}
|
||||
}
|
||||
|
||||
return MadeChange;
|
||||
}
|
|
@ -404,9 +404,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
|
|||
}
|
||||
case PPC::F8RCRegClassID:
|
||||
case PPC::F4RCRegClassID:
|
||||
case PPC::QFRCRegClassID:
|
||||
case PPC::QSRCRegClassID:
|
||||
case PPC::QBRCRegClassID:
|
||||
case PPC::VRRCRegClassID:
|
||||
case PPC::VFRCRegClassID:
|
||||
case PPC::VSLRCRegClassID:
|
||||
|
|
|
@ -153,7 +153,6 @@ public:
|
|||
switch (RegName[0]) {
|
||||
case 'r':
|
||||
case 'f':
|
||||
case 'q': // for QPX
|
||||
case 'v':
|
||||
if (RegName[1] == 's')
|
||||
return RegName + 2;
|
||||
|
|
|
@ -54,13 +54,6 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
|
|||
let HWEncoding{4-0} = num;
|
||||
}
|
||||
|
||||
// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
|
||||
class QFPR<FPR SubReg, string n> : PPCReg<n> {
|
||||
let HWEncoding = SubReg.HWEncoding;
|
||||
let SubRegs = [SubReg];
|
||||
let SubRegIndices = [sub_64];
|
||||
}
|
||||
|
||||
// VF - One of the 32 64-bit floating-point subregisters of the vector
|
||||
// registers (used by VSX).
|
||||
class VF<bits<5> num, string n> : PPCReg<n> {
|
||||
|
@ -132,12 +125,6 @@ foreach Index = 0-31 in {
|
|||
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
|
||||
}
|
||||
|
||||
// QPX Floating-point registers
|
||||
foreach Index = 0-31 in {
|
||||
def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
|
||||
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
|
||||
}
|
||||
|
||||
// Vector registers
|
||||
foreach Index = 0-31 in {
|
||||
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
|
||||
|
@ -343,16 +330,6 @@ def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC,
|
|||
// Register class for single precision scalars in VSX registers
|
||||
def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
|
||||
|
||||
// For QPX
|
||||
def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
|
||||
(sequence "QF%u", 31, 14))>;
|
||||
def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
|
||||
def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
|
||||
// These are actually stored as floating-point values where a positive
|
||||
// number is true and anything else (including NaN) is false.
|
||||
let Size = 256;
|
||||
}
|
||||
|
||||
def CRBITRC : RegisterClass<"PPC", [i1], 32,
|
||||
(add CR2LT, CR2GT, CR2EQ, CR2UN,
|
||||
CR3LT, CR3GT, CR3EQ, CR3UN,
|
||||
|
|
|
@ -40,12 +40,9 @@ def P9Model : SchedMachineModel {
|
|||
|
||||
let CompleteModel = 1;
|
||||
|
||||
// Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
|
||||
// Engine), prefixed instructions on Power 9, PC relative mem ops, or
|
||||
// instructions introduced in ISA 3.1.
|
||||
let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
|
||||
IsISA3_1];
|
||||
|
||||
// Do not support SPE (Signal Processing Engine), prefixed instructions on
|
||||
// Power 9, PC relative mem ops, or instructions introduced in ISA 3.1.
|
||||
let UnsupportedFeatures = [HasSPE, PrefixInstrs, PCRelativeMemops, IsISA3_1];
|
||||
}
|
||||
|
||||
let SchedModel = P9Model in {
|
||||
|
|
|
@ -35,10 +35,6 @@ using namespace llvm;
|
|||
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
|
||||
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
|
||||
cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
|
||||
cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableMachinePipeliner("ppc-enable-pipeliner",
|
||||
cl::desc("Enable Machine Pipeliner for PPC"),
|
||||
|
@ -70,7 +66,6 @@ void PPCSubtarget::initializeEnvironment() {
|
|||
HasAltivec = false;
|
||||
HasSPE = false;
|
||||
HasFPU = false;
|
||||
HasQPX = false;
|
||||
HasVSX = false;
|
||||
NeedsTwoConstNR = false;
|
||||
HasP8Vector = false;
|
||||
|
@ -109,7 +104,6 @@ void PPCSubtarget::initializeEnvironment() {
|
|||
HasInvariantFunctionDescriptors = false;
|
||||
HasPartwordAtomics = false;
|
||||
HasDirectMove = false;
|
||||
IsQPXStackUnaligned = false;
|
||||
HasHTM = false;
|
||||
HasFloat128 = false;
|
||||
HasFusion = false;
|
||||
|
@ -158,7 +152,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
|||
|
||||
if (HasSPE && IsPPC64)
|
||||
report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
|
||||
if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
|
||||
if (HasSPE && (HasAltivec || HasVSX || HasFPU))
|
||||
report_fatal_error(
|
||||
"SPE and traditional floating point cannot both be enabled.\n", false);
|
||||
|
||||
|
@ -166,10 +160,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
|||
if (!HasSPE)
|
||||
HasFPU = true;
|
||||
|
||||
// QPX requires a 32-byte aligned stack. Note that we need to do this if
|
||||
// we're compiling for a BG/Q system regardless of whether or not QPX
|
||||
// is enabled because external functions will assume this alignment.
|
||||
IsQPXStackUnaligned = QPXStackUnaligned;
|
||||
StackAlignment = getPlatformStackAlignment();
|
||||
|
||||
// Determine endianness.
|
||||
|
|
|
@ -97,7 +97,6 @@ protected:
|
|||
bool HasAltivec;
|
||||
bool HasFPU;
|
||||
bool HasSPE;
|
||||
bool HasQPX;
|
||||
bool HasVSX;
|
||||
bool NeedsTwoConstNR;
|
||||
bool HasP8Vector;
|
||||
|
@ -150,11 +149,6 @@ protected:
|
|||
|
||||
POPCNTDKind HasPOPCNTD;
|
||||
|
||||
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
|
||||
/// alignment has not been changed, we need to keep the 16-byte alignment
|
||||
/// of the stack.
|
||||
bool IsQPXStackUnaligned;
|
||||
|
||||
const PPCTargetMachine &TM;
|
||||
PPCFrameLowering FrameLowering;
|
||||
PPCInstrInfo InstrInfo;
|
||||
|
@ -255,7 +249,6 @@ public:
|
|||
bool hasAltivec() const { return HasAltivec; }
|
||||
bool hasSPE() const { return HasSPE; }
|
||||
bool hasFPU() const { return HasFPU; }
|
||||
bool hasQPX() const { return HasQPX; }
|
||||
bool hasVSX() const { return HasVSX; }
|
||||
bool needsTwoConstNR() const { return NeedsTwoConstNR; }
|
||||
bool hasP8Vector() const { return HasP8Vector; }
|
||||
|
@ -291,11 +284,7 @@ public:
|
|||
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
|
||||
bool hasDirectMove() const { return HasDirectMove; }
|
||||
|
||||
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
|
||||
Align getPlatformStackAlignment() const {
|
||||
if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
|
||||
return Align(32);
|
||||
|
||||
return Align(16);
|
||||
}
|
||||
|
||||
|
@ -325,9 +314,6 @@ public:
|
|||
|
||||
const Triple &getTargetTriple() const { return TargetTriple; }
|
||||
|
||||
/// isBGQ - True if this is a BG/Q platform.
|
||||
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
|
||||
|
||||
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
|
||||
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
|
||||
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
|
||||
|
|
|
@ -63,10 +63,6 @@ static cl::
|
|||
opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
|
||||
cl::desc("Disable VSX Swap Removal for PPC"));
|
||||
|
||||
static cl::
|
||||
opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
|
||||
cl::desc("Disable QPX load splat simplification"));
|
||||
|
||||
static cl::
|
||||
opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
|
||||
cl::desc("Disable machine peepholes for PPC"));
|
||||
|
@ -114,7 +110,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
|
|||
initializePPCReduceCRLogicalsPass(PR);
|
||||
initializePPCBSelPass(PR);
|
||||
initializePPCBranchCoalescingPass(PR);
|
||||
initializePPCQPXLoadSplatPass(PR);
|
||||
initializePPCBoolRetToIntPass(PR);
|
||||
initializePPCExpandISELPass(PR);
|
||||
initializePPCPreEmitPeepholePass(PR);
|
||||
|
@ -411,14 +406,9 @@ void PPCPassConfig::addIRPasses() {
|
|||
|
||||
// Lower generic MASSV routines to PowerPC subtarget-specific entries.
|
||||
addPass(createPPCLowerMASSVEntriesPass());
|
||||
|
||||
// For the BG/Q (or if explicitly requested), add explicit data prefetch
|
||||
// intrinsics.
|
||||
bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
|
||||
getOptLevel() != CodeGenOpt::None;
|
||||
|
||||
// If explicitly requested, add explicit data prefetch intrinsics.
|
||||
if (EnablePrefetch.getNumOccurrences() > 0)
|
||||
UsePrefetching = EnablePrefetch;
|
||||
if (UsePrefetching)
|
||||
addPass(createLoopDataPrefetchPass());
|
||||
|
||||
if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
|
||||
|
@ -515,15 +505,8 @@ void PPCPassConfig::addPreRegAlloc() {
|
|||
}
|
||||
|
||||
void PPCPassConfig::addPreSched2() {
|
||||
if (getOptLevel() != CodeGenOpt::None) {
|
||||
if (getOptLevel() != CodeGenOpt::None)
|
||||
addPass(&IfConverterID);
|
||||
|
||||
// This optimization must happen after anything that might do store-to-load
|
||||
// forwarding. Here we're after RA (and, thus, when spills are inserted)
|
||||
// but before post-RA scheduling.
|
||||
if (!DisableQPXLoadSplat)
|
||||
addPass(createPPCQPXLoadSplatPass());
|
||||
}
|
||||
}
|
||||
|
||||
void PPCPassConfig::addPreEmitPass() {
|
||||
|
|
|
@ -25,8 +25,7 @@ using namespace llvm;
|
|||
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
|
||||
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
|
||||
|
||||
// This is currently only used for the data prefetch pass which is only enabled
|
||||
// for BG/Q by default.
|
||||
// This is currently only used for the data prefetch pass
|
||||
static cl::opt<unsigned>
|
||||
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
|
||||
cl::desc("The loop prefetch cache line size"));
|
||||
|
@ -104,55 +103,6 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
|||
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
|
||||
return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
|
||||
}
|
||||
case Intrinsic::ppc_qpx_qvlfs:
|
||||
// Turn PPC QPX qvlfs -> load if the pointer is known aligned.
|
||||
if (getOrEnforceKnownAlignment(
|
||||
II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
|
||||
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
|
||||
Type *VTy =
|
||||
VectorType::get(IC.Builder.getFloatTy(),
|
||||
cast<VectorType>(II.getType())->getElementCount());
|
||||
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
|
||||
PointerType::getUnqual(VTy));
|
||||
Value *Load = IC.Builder.CreateLoad(VTy, Ptr);
|
||||
return new FPExtInst(Load, II.getType());
|
||||
}
|
||||
break;
|
||||
case Intrinsic::ppc_qpx_qvlfd:
|
||||
// Turn PPC QPX qvlfd -> load if the pointer is known aligned.
|
||||
if (getOrEnforceKnownAlignment(
|
||||
II.getArgOperand(0), Align(32), IC.getDataLayout(), &II,
|
||||
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
|
||||
Value *Ptr = IC.Builder.CreateBitCast(
|
||||
II.getArgOperand(0), PointerType::getUnqual(II.getType()));
|
||||
return new LoadInst(II.getType(), Ptr, "", false, Align(32));
|
||||
}
|
||||
break;
|
||||
case Intrinsic::ppc_qpx_qvstfs:
|
||||
// Turn PPC QPX qvstfs -> store if the pointer is known aligned.
|
||||
if (getOrEnforceKnownAlignment(
|
||||
II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
|
||||
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
|
||||
Type *VTy = VectorType::get(
|
||||
IC.Builder.getFloatTy(),
|
||||
cast<VectorType>(II.getArgOperand(0)->getType())->getElementCount());
|
||||
Value *TOp = IC.Builder.CreateFPTrunc(II.getArgOperand(0), VTy);
|
||||
Type *OpPtrTy = PointerType::getUnqual(VTy);
|
||||
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
|
||||
return new StoreInst(TOp, Ptr, false, Align(16));
|
||||
}
|
||||
break;
|
||||
case Intrinsic::ppc_qpx_qvstfd:
|
||||
// Turn PPC QPX qvstfd -> store if the pointer is known aligned.
|
||||
if (getOrEnforceKnownAlignment(
|
||||
II.getArgOperand(1), Align(32), IC.getDataLayout(), &II,
|
||||
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
|
||||
Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
|
||||
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
|
||||
return new StoreInst(II.getArgOperand(0), Ptr, false, Align(32));
|
||||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::ppc_altivec_vperm:
|
||||
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
|
||||
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
|
||||
|
@ -736,10 +686,7 @@ bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
|
|||
}
|
||||
|
||||
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
|
||||
// On the A2, always unroll aggressively. For QPX unaligned loads, we depend
|
||||
// on combining the loads generated for consecutive accesses, and failure to
|
||||
// do so is particularly expensive. This makes it much more likely (compared
|
||||
// to only using concatenation unrolling).
|
||||
// On the A2, always unroll aggressively.
|
||||
if (ST->getCPUDirective() == PPC::DIR_A2)
|
||||
return true;
|
||||
|
||||
|
@ -799,7 +746,6 @@ const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
|
|||
|
||||
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
|
||||
if (Vector) {
|
||||
if (ST->hasQPX()) return 256;
|
||||
if (ST->hasAltivec()) return 128;
|
||||
return 0;
|
||||
}
|
||||
|
@ -828,8 +774,6 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
|
|||
}
|
||||
|
||||
unsigned PPCTTIImpl::getPrefetchDistance() const {
|
||||
// This seems like a reasonable default for the BG/Q (this pass is enabled, by
|
||||
// default, only on the BG/Q).
|
||||
return 300;
|
||||
}
|
||||
|
||||
|
@ -918,7 +862,7 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
|||
// Legalize the type.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
||||
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
|
||||
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
|
||||
// (at least in the sense that there need only be one non-loop-invariant
|
||||
// instruction). We need one such shuffle instruction for each actual
|
||||
// register (this is not true for arbitrary shuffles, but is true for the
|
||||
|
@ -974,13 +918,6 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
|
|||
|
||||
return Cost;
|
||||
|
||||
} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
|
||||
// Floating point scalars are already located in index #0.
|
||||
if (Index == 0)
|
||||
return 0;
|
||||
|
||||
return Cost;
|
||||
|
||||
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
|
||||
if (ST->hasP9Altivec()) {
|
||||
if (ISD == ISD::INSERT_VECTOR_ELT)
|
||||
|
@ -1055,8 +992,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||
LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
|
||||
bool IsVSXType = ST->hasVSX() &&
|
||||
(LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
|
||||
bool IsQPXType = ST->hasQPX() &&
|
||||
(LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
|
||||
|
||||
// VSX has 32b/64b load instructions. Legalization can handle loading of
|
||||
// 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
|
||||
|
@ -1079,8 +1014,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||
// for Altivec types using the VSX instructions, but that's more expensive
|
||||
// than using the permutation-based load sequence. On the P8, that's no
|
||||
// longer true.
|
||||
if (Opcode == Instruction::Load &&
|
||||
((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
|
||||
if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
|
||||
*Alignment >= LT.second.getScalarType().getStoreSize())
|
||||
return Cost + LT.first; // Add the cost of the permutations.
|
||||
|
||||
|
@ -1133,7 +1067,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
|
|||
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
|
||||
CostKind);
|
||||
|
||||
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
|
||||
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
|
||||
// (at least in the sense that there need only be one non-loop-invariant
|
||||
// instruction). For each result vector, we need one shuffle per incoming
|
||||
// vector (except that the first shuffle can take two incoming vectors
|
||||
|
|
|
@ -4751,15 +4751,14 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
|
|||
// For PowerPC, we need to deal with alignment of stack arguments -
|
||||
// they are mostly aligned to 8 bytes, but vectors and i128 arrays
|
||||
// are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
|
||||
// and QPX vectors are aligned to 32 bytes. For that reason, we
|
||||
// compute current offset from stack pointer (which is always properly
|
||||
// aligned), and offset for the first vararg, then subtract them.
|
||||
// For that reason, we compute current offset from stack pointer (which is
|
||||
// always properly aligned), and offset for the first vararg, then subtract
|
||||
// them.
|
||||
unsigned VAArgBase;
|
||||
Triple TargetTriple(F.getParent()->getTargetTriple());
|
||||
// Parameter save area starts at 48 bytes from frame pointer for ABIv1,
|
||||
// and 32 bytes for ABIv2. This is usually determined by target
|
||||
// endianness, but in theory could be overridden by function attribute.
|
||||
// For simplicity, we ignore it here (it'd only matter for QPX vectors).
|
||||
if (TargetTriple.getArch() == Triple::ppc64)
|
||||
VAArgBase = 48;
|
||||
else
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
@X = external global [16000 x double], align 32
|
||||
|
|
|
@ -218,42 +218,6 @@ entry:
|
|||
; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
|
||||
entry:
|
||||
%r = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv4float
|
||||
; CHECK: cost of 2 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
|
||||
entry:
|
||||
%r = load <8 x float>, <8 x float>* %p, align 4
|
||||
ret <8 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv8float
|
||||
; CHECK: cost of 4 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv4double
|
||||
; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
|
||||
entry:
|
||||
%r = load <8 x double>, <8 x double>* %p, align 8
|
||||
ret <8 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv8double
|
||||
; CHECK: cost of 4 for instruction: %r = load <8 x double>, <8 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
|
||||
entry:
|
||||
store <16 x i8> %v, <16 x i8>* %p, align 1
|
||||
|
@ -362,43 +326,6 @@ entry:
|
|||
; CHECK: cost of 2 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
|
||||
entry:
|
||||
store <4 x float> %v, <4 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv4float
|
||||
; CHECK: cost of 7 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
|
||||
entry:
|
||||
store <8 x float> %v, <8 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv8float
|
||||
; CHECK: cost of 15 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
|
||||
entry:
|
||||
store <4 x double> %v, <4 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv4double
|
||||
; CHECK: cost of 7 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
|
||||
entry:
|
||||
store <8 x double> %v, <8 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv8double
|
||||
; CHECK: cost of 15 for instruction: store <8 x double> %v, <8 x double>* %p, align 8
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="pwr7" }
|
||||
attributes #1 = { nounwind "target-cpu"="a2q" }
|
||||
attributes #2 = { nounwind "target-cpu"="pwr8" }
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -enable-misched < %s | FileCheck %s
|
||||
;
|
||||
; PR14315: misched should not move the physreg copy of %t below the calls.
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@
|
|||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #1
|
||||
|
||||
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #1
|
||||
|
||||
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
declare i32 @bar(i8* %a) nounwind;
|
||||
define i32 @foo() nounwind {
|
||||
%p = alloca i8, i8 115
|
||||
store i8 0, i8* %p
|
||||
%r = call i32 @bar(i8* %p)
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; Without QPX, the allocated stack frame is 240 bytes, but with QPX
|
||||
; (because we require 32-byte alignment), it is 256 bytes.
|
||||
; CHECK-A2: @foo
|
||||
; CHECK-A2: stdu 1, -240(1)
|
||||
; CHECK-A2Q: @foo
|
||||
; CHECK-A2Q: stdu 1, -256(1)
|
||||
; CHECK-BGQ: @foo
|
||||
; CHECK-BGQ: stdu 1, -256(1)
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 -mattr=+qpx | FileCheck %s
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @foo
|
||||
|
|
@ -298,7 +298,7 @@ _ZN10SubProcess12SafeSyscalls5fcntlEiil.exit: ; preds = %_ZN10SubProcess12Sa
|
|||
; Function Attrs: nounwind argmemonly
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind argmemonly }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2 | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define i32 @zytest(i32 %a) nounwind {
|
||||
entry:
|
||||
|
|
|
@ -65,7 +65,7 @@ entry:
|
|||
}
|
||||
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
|
|
|
@ -63,8 +63,8 @@
|
|||
ret i64 %2
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
!llvm.ident = !{!2}
|
||||
|
|
|
@ -187,7 +187,7 @@
|
|||
ret i64 %cond
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
!llvm.ident = !{!2}
|
||||
|
|
|
@ -983,10 +983,10 @@
|
|||
ret i64 %xor
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
!llvm.ident = !{!2}
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s --check-prefix=QPX
|
||||
|
||||
declare float @fabsf(float)
|
||||
|
||||
|
@ -64,11 +63,6 @@ loop_exit:
|
|||
; CHECK-NOT: xsmindp
|
||||
; CHECK: blr
|
||||
|
||||
; QPX-LABEL: test1v:
|
||||
; QPX: mtctr
|
||||
; QPX-NOT: bl fminf
|
||||
; QPX: blr
|
||||
|
||||
define void @test1a(float %f, float* %fp) {
|
||||
entry:
|
||||
br label %loop_body
|
||||
|
@ -139,11 +133,6 @@ loop_exit:
|
|||
; CHECK-NOT: xsmaxdp
|
||||
; CHECK: blr
|
||||
|
||||
; QPX-LABEL: test2v:
|
||||
; QPX: mtctr
|
||||
; QPX-NOT: bl fmax
|
||||
; QPX: blr
|
||||
|
||||
define void @test2a(float %f, float* %fp) {
|
||||
entry:
|
||||
br label %loop_body
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q | FileCheck %s --check-prefixes=CHECK,CHECK-A2Q
|
||||
|
||||
; Verify that we do NOT generate the mtctr instruction for loop trip counts < 4
|
||||
; The latency of the mtctr is only justified if there are more than 4 comparisons that are removed as a result.
|
||||
|
@ -86,11 +85,8 @@ for.body: ; preds = %entry, %for.body
|
|||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
|
||||
; a2q should use mtctr, but pwr8 should not use mtctr.
|
||||
define signext i32 @testTripCount2NonSmallLoop() {
|
||||
; CHECK-LABEL: testTripCount2NonSmallLoop:
|
||||
; CHECK-A2Q: mtctr
|
||||
; CHECK-PWR8-NOT: mtctr
|
||||
; CHECK: blr
|
||||
|
||||
|
@ -121,12 +117,9 @@ for.end: ; preds = %if.end
|
|||
ret i32 %conv
|
||||
}
|
||||
|
||||
; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
|
||||
; a2q should use mtctr, but pwr8 should not use mtctr.
|
||||
define signext i32 @testTripCount5() {
|
||||
; CHECK-LABEL: testTripCount5:
|
||||
; CHECK-PWR8-NOT: mtctr
|
||||
; CHECK-A2Q: mtctr
|
||||
|
||||
entry:
|
||||
%.prea = load i32, i32* @a, align 4
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
; that were both inputs to the inline asm and also early-clobber outputs).
|
||||
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
|
||||
%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||||
%struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712 = type { %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32 }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64-unknown-linux"
|
||||
|
||||
%"class.Foam::messageStream.6" = type <{ %"class.Foam::string.5", i32, i32, i32, [4 x i8] }>
|
||||
%"class.Foam::string.5" = type { %"class.std::basic_string.4" }
|
||||
|
@ -419,8 +419,8 @@ declare void @_ZN4Foam11regIOobjectD2Ev() #0
|
|||
|
||||
declare void @_ZN4Foam6reduceIiNS_5sumOpIiEEEEvRKNS_4ListINS_8UPstream11commsStructEEERT_RKT0_ii() #0
|
||||
|
||||
attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs -O0 -relocation-model=pic < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
|
||||
%"class.std::__1::__tree_node.130.151" = type { %"class.std::__1::__tree_node_base.base.128.149", %"class.boost::serialization::extended_type_info.129.150"* }
|
||||
%"class.std::__1::__tree_node_base.base.128.149" = type <{ %"class.std::__1::__tree_end_node.127.148", %"class.std::__1::__tree_node_base.126.147"*, %"class.std::__1::__tree_node_base.126.147"*, i8 }>
|
||||
|
|
|
@ -33,4 +33,4 @@ define float @f(float %xf) #0 {
|
|||
ret float %25
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define linkonce_odr double @test1(ppc_fp128 %input) {
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
|
||||
%"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::__exception_ptr::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 }
|
||||
%"class.std::__1::__shared_count" = type { i32 (...)**, i64 }
|
||||
|
|
|
@ -33,5 +33,5 @@ declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)*, i8*)
|
|||
|
||||
declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5
|
||||
|
||||
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #7 = { nobuiltin nounwind }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
|
||||
%struct.BG_CoordinateMapping_t = type { [4 x i8] }
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -enable-ppc-prefetching=true -verify-machineinstrs < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @foo(double* %x, double* nocapture readonly %y) #0 {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -enable-ppc-prefetching=true -mcpu=a2 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @foo(double* nocapture %a, double* nocapture readonly %b) #0 {
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BGQ
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
|
@ -21,7 +20,6 @@ for.body: ; preds = %for.body, %entry
|
|||
|
||||
; CHECK-LABEL: @foo
|
||||
|
||||
; CHECK-BGQ-DAG: dcbt 4, 5
|
||||
; CHECK-DAG: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
|
||||
; CHECK-DAG: fadd [[REG2:[0-9]+]], [[REG1]], 0
|
||||
; CHECK-DAG: stfdu [[REG2]], 8({{[0-9]+}})
|
||||
|
@ -34,15 +32,13 @@ for.cond.cleanup6: ; preds = %for.body7
|
|||
|
||||
for.body7: ; preds = %for.body, %for.body7
|
||||
%i3.017 = phi i32 [ %inc9, %for.body7 ], [ 0, %for.body ]
|
||||
tail call void bitcast (void (...)* @bar to void ()*)() #2
|
||||
tail call void bitcast (void (...)* @bar to void ()*)() #0
|
||||
%inc9 = add nuw nsw i32 %i3.017, 1
|
||||
%exitcond = icmp eq i32 %inc9, 1024
|
||||
br i1 %exitcond, label %for.cond.cleanup6, label %for.body7
|
||||
}
|
||||
|
||||
declare void @bar(...) #1
|
||||
declare void @bar(...)
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="a2q" }
|
||||
attributes #1 = { "target-cpu"="a2q" }
|
||||
attributes #2 = { nounwind }
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
|
|
|
@ -41,6 +41,6 @@ define void @aligned_slot() #0 {
|
|||
; Function Attrs: argmemonly nounwind
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { argmemonly nounwind }
|
||||
attributes #2 = { nounwind }
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
|
||||
; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
|
||||
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 < %s | FileCheck %s -check-prefix=FIXPOINT
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
@ -93,9 +92,6 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
|
|||
define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
|
@ -110,9 +106,6 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <
|
|||
define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
|
@ -127,9 +120,6 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
|
|||
define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
|
@ -144,9 +134,6 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
|
|||
define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
|
||||
; CHECK-LABEL: vector_reassociate_adds4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
|
||||
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
|
||||
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
|
||||
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
|
||||
|
@ -217,9 +204,6 @@ define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
|
|||
define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) {
|
||||
; CHECK-LABEL: reassociate_mamaa_double:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-QPX-DAG: fmadd [[REG0:[0-9]+]], 4, 3, 2
|
||||
; CHECK-QPX-DAG: fmadd [[REG1:[0-9]+]], 6, 5, 1
|
||||
; CHECK-QPX: fadd 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR-DAG: xsmaddadp 1, 6, 5
|
||||
; CHECK-PWR-DAG: xsmaddadp 2, 4, 3
|
||||
; CHECK-PWR: xsadddp 1, 2, 1
|
||||
|
@ -250,9 +234,6 @@ define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, fl
|
|||
define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) {
|
||||
; CHECK-LABEL: reassociate_mamaa_vec:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-QPX-DAG: qvfmadds [[REG0:[0-9]+]], 4, 3, 2
|
||||
; CHECK-QPX-DAG: qvfmadds [[REG1:[0-9]+]], 6, 5, 1
|
||||
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
|
||||
; CHECK-PWR-DAG: xvmaddasp [[REG0:[0-9]+]], 39, 38
|
||||
; CHECK-PWR-DAG: xvmaddasp [[REG1:[0-9]+]], 37, 36
|
||||
; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]]
|
||||
|
@ -268,11 +249,6 @@ define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x f
|
|||
define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) {
|
||||
; CHECK-LABEL: reassociate_mamama_double:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-QPX: fmadd [[REG0:[0-9]+]], 2, 1, 7
|
||||
; CHECK-QPX-DAG: fmul [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK-QPX-DAG: fmadd [[REG2:[0-9]+]], 6, 5, [[REG0]]
|
||||
; CHECK-QPX-DAG: fmadd [[REG3:[0-9]+]], 9, 8, [[REG1]]
|
||||
; CHECK-QPX: fadd 1, [[REG2]], [[REG3]]
|
||||
; CHECK-PWR: xsmaddadp 7, 2, 1
|
||||
; CHECK-PWR-DAG: xsmuldp [[REG0:[0-9]+]], 4, 3
|
||||
; CHECK-PWR-DAG: xsmaddadp 7, 6, 5
|
||||
|
|
|
@ -19,7 +19,7 @@ entry:
|
|||
|
||||
declare void @bar(double) #1
|
||||
|
||||
attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
|
||||
attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
|
||||
attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
; RUN: opt -ee-instrument < %s | opt -inline | llc | FileCheck %s
|
||||
; RUN: opt -ee-instrument < %s | opt -inline | llc -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
|
||||
|
||||
; The run-line mimics how Clang might run the instrumentation passes.
|
||||
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
|
||||
define void @leaf_function() #0 {
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PWR7
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR8
|
||||
; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s -check-prefix=A2Q
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
|
@ -25,12 +24,6 @@ entry:
|
|||
; PWR8: lxvw4x
|
||||
; PWR8: stxvw4x
|
||||
; PWR8: blr
|
||||
|
||||
; A2Q-LABEL: @foo1
|
||||
; A2Q-NOT: bl memcpy
|
||||
; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
|
||||
; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
|
||||
; A2Q: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
|
@ -52,12 +45,6 @@ entry:
|
|||
; PWR8: lxvw4x
|
||||
; PWR8: stxvw4x
|
||||
; PWR8: blr
|
||||
|
||||
; A2Q-LABEL: @foo2
|
||||
; A2Q-NOT: bl memcpy
|
||||
; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
|
||||
; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
|
||||
; A2Q: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
|
@ -76,11 +63,6 @@ entry:
|
|||
; PWR8-NOT: bl memset
|
||||
; PWR8: stxvw4x
|
||||
; PWR8: blr
|
||||
|
||||
; A2Q-LABEL: @bar1
|
||||
; A2Q-NOT: bl memset
|
||||
; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
|
||||
; A2Q: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
|
@ -99,11 +81,6 @@ entry:
|
|||
; PWR8-NOT: bl memset
|
||||
; PWR8: stxvw4x
|
||||
; PWR8: blr
|
||||
|
||||
; A2Q-LABEL: @bar2
|
||||
; A2Q-NOT: bl memset
|
||||
; A2Q: qvstfdx
|
||||
; A2Q: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
|
|
|
@ -1,48 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -O0 < %s | FileCheck %s -check-prefix=CHECK-O0
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_qpx() unnamed_addr #0 align 2 {
|
||||
entry:
|
||||
%0 = load i32, i32* undef, align 4
|
||||
%1 = trunc i32 %0 to i8
|
||||
call void @llvm.memset.p0i8.i64(i8* align 32 null, i8 %1, i64 64, i1 false)
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_qpx
|
||||
; CHECK: qvstfdx
|
||||
; CHECK: qvstfdx
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-O0-LABEL: @test_qpx
|
||||
; CHECK-O0-NOT: qvstfdx
|
||||
; CHECK-O0: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test_vsx() unnamed_addr #2 align 2 {
|
||||
entry:
|
||||
%0 = load i32, i32* undef, align 4
|
||||
%1 = trunc i32 %0 to i8
|
||||
call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i1 false)
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_vsx
|
||||
; CHECK: stxvw4x
|
||||
; CHECK: stxvw4x
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-O0-LABEL: @test_vsx
|
||||
; CHECK-O0-NOT: stxvw4x
|
||||
; CHECK-O0: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="a2q" }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind "target-cpu"="pwr7" }
|
||||
|
|
@ -1,8 +1,7 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -enable-misched -pre-RA-sched=source -scheditins=false \
|
||||
; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
|
||||
; RUN: -disable-ifcvt-triangle-false -disable-post-ra -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
|
||||
;
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
; %val1 is a load live out of %entry. It should be hoisted
|
||||
; above the add.
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
; RUN: llc < %s -enable-misched -verify-machineinstrs
|
||||
; PR14302
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
@b = external global [16000 x double], align 32
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
declare zeroext i1 @ri1()
|
||||
declare void @se1()
|
||||
|
|
|
@ -92,7 +92,7 @@ entry:
|
|||
|
||||
; Left the target features in this test because it is important that caller has
|
||||
; -pcrelative-memops while callee has +pcrelative-memops
|
||||
attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
|
||||
attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
|
||||
attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-qpx,-spe" }
|
||||
attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
|
||||
attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
|
||||
attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-spe" }
|
||||
attributes #3 = { nounwind }
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+popcntd < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOWPC
|
||||
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=pwr7 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q < %s | FileCheck %s --check-prefix=SLOWPC
|
||||
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q -mattr=+popcntd < %s | FileCheck %s
|
||||
|
||||
define i64 @_cntb64(i64 %x) nounwind readnone {
|
||||
%cnt = tail call i64 @llvm.ppc.popcntb(i64 %x)
|
||||
|
|
|
@ -105,14 +105,3 @@
|
|||
; STOP-AFTER-BRANCH-COALESCING-NOT: "ppc-branch-coalescing" pass is not registered.
|
||||
; STOP-AFTER-BRANCH-COALESCING: Branch Coalescing
|
||||
|
||||
|
||||
; Test pass name: ppc-qpx-load-splat.
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-QPX-LOAD-SPLAT
|
||||
; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: -ppc-qpx-load-splat
|
||||
; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
|
||||
; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: PowerPC QPX Load Splat Simplification
|
||||
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-QPX-LOAD-SPLAT
|
||||
; STOP-AFTER-QPX-LOAD-SPLAT: -ppc-qpx-load-splat
|
||||
; STOP-AFTER-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
|
||||
; STOP-AFTER-QPX-LOAD-SPLAT: PowerPC QPX Load Splat Simplification
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO
|
||||
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
|
||||
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
|
||||
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
|
||||
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
|
||||
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -code-model=small | FileCheck %s -check-prefix=SCM
|
||||
|
||||
; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because
|
||||
|
@ -117,23 +117,6 @@ define void @caller_local_sret_32(%S_32* %a) #1 {
|
|||
attributes #0 = { noinline nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
; vector <4 x i1> test
|
||||
|
||||
define void @callee_v4i1(i8 %a, <4 x i1> %b, <4 x i1> %c) { ret void }
|
||||
define void @caller_v4i1_reorder(i8 %a, <4 x i1> %b, <4 x i1> %c) {
|
||||
tail call void @callee_v4i1(i8 %a, <4 x i1> %c, <4 x i1> %b)
|
||||
ret void
|
||||
|
||||
; <4 x i1> is 32 bytes aligned, if subtarget doesn't support qpx, then we can't
|
||||
; place b, c to qpx register, so we can't do sco on caller_v4i1_reorder
|
||||
|
||||
; CHECK-SCO-LABEL: caller_v4i1_reorder:
|
||||
; CHECK-SCO: bl callee_v4i1
|
||||
|
||||
; CHECK-SCO-HASQPX-LABEL: caller_v4i1_reorder:
|
||||
; CHECK-SCO-HASQPX: b callee_v4i1
|
||||
}
|
||||
|
||||
define void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) { ret void }
|
||||
define void @f128_caller(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) {
|
||||
tail call void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b)
|
||||
|
|
|
@ -47,8 +47,8 @@ declare double @pow(double, double) #0
|
|||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind readnone }
|
||||
attributes #3 = { nounwind }
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ entry:
|
|||
declare fastcc void @bar([2 x i64], [2 x i64]) unnamed_addr #1 align 2
|
||||
|
||||
attributes #0 = { argmemonly nounwind }
|
||||
attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
|
|
@ -67,4 +67,4 @@ bb:
|
|||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
target triple = "powerpc64le-unknown-linux"
|
||||
|
||||
%t1 = type { %t2*, %t3* }
|
||||
%t2 = type <{ %t3*, i32, [4 x i8] }>
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define void @s452(i32 %inp1) nounwind {
|
||||
entry:
|
||||
br label %for.body4
|
||||
|
||||
for.body4: ; preds = %for.body4, %entry
|
||||
%conv.4 = sitofp i32 %inp1 to double
|
||||
%conv.5 = sitofp i32 %inp1 to double
|
||||
%mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
|
||||
%v = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
|
||||
%vv = fmul <2 x double> %v, %v
|
||||
%add7.4 = fadd <2 x double> %vv, %vv
|
||||
store <2 x double> %add7.4, <2 x double>* undef, align 16
|
||||
br i1 undef, label %for.end, label %for.body4
|
||||
|
||||
for.end: ; preds = %for.body4
|
||||
unreachable
|
||||
; CHECK-LABEL: @s452
|
||||
; CHECK: lfiwax [[REG1:[0-9]+]],
|
||||
; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
|
||||
; FIXME: We could 'promote' this to a vector earlier and remove this splat.
|
||||
; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfadd
|
||||
; CHECK: qvesplati {{[0-9]+}},
|
||||
; FIXME: We can use qvstfcdx here instead of two stores.
|
||||
; CHECK: stfd
|
||||
; CHECK: stfd
|
||||
}
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
|
||||
%v1 = insertelement <4 x double> undef, double %f1, i32 0
|
||||
%v2 = insertelement <4 x double> %v1, double %f2, i32 1
|
||||
%v3 = insertelement <4 x double> %v2, double %f3, i32 2
|
||||
%v4 = insertelement <4 x double> %v3, double %f4, i32 3
|
||||
ret <4 x double> %v4
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK: qvgpci [[REG1:[0-9]+]], 275
|
||||
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
|
||||
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
|
||||
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
|
||||
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
|
||||
%v1 = insertelement <4 x float> undef, float %f1, i32 0
|
||||
%v2 = insertelement <4 x float> %v1, float %f2, i32 1
|
||||
%v3 = insertelement <4 x float> %v2, float %f3, i32 2
|
||||
%v4 = insertelement <4 x float> %v3, float %f4, i32 3
|
||||
ret <4 x float> %v4
|
||||
|
||||
; CHECK-LABEL: @goo
|
||||
; CHECK: qvgpci [[REG1:[0-9]+]], 275
|
||||
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
|
||||
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
|
||||
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
|
||||
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
declare <4 x double> @foo(<4 x double> %p)
|
||||
|
||||
define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
|
||||
entry:
|
||||
%v = call <4 x double> @foo(<4 x double> %p)
|
||||
%w = call <4 x double> @foo(<4 x double> %q)
|
||||
%x = fadd <4 x double> %v, %w
|
||||
ret <4 x double> %x
|
||||
|
||||
; CHECK-LABEL: @bar
|
||||
; CHECK: qvstfdx 2,
|
||||
; CHECK: bl foo
|
||||
; CHECK: qvstfdx 1,
|
||||
; CHECK: qvlfdx 1,
|
||||
; CHECK: bl foo
|
||||
; CHECK: qvlfdx [[REG:[0-9]+]],
|
||||
; CHECK: qvfadd 1, [[REG]], 1
|
||||
}
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
|
||||
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
define <4 x double> @foo(double* nocapture readonly %a) #0 {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvdsx v2, 0, r3
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%0 = load double, double* %a, align 8
|
||||
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
|
||||
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
|
||||
; CHECK-LABEL: foox:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r4, r4, 3
|
||||
; CHECK-NEXT: lxvdsx v2, r3, r4
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%p = getelementptr double, double* %a, i64 %idx
|
||||
%0 = load double, double* %p, align 8
|
||||
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
|
||||
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
|
||||
; CHECK-LABEL: fooxu:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r4, r4, 3
|
||||
; CHECK-NEXT: add r6, r3, r4
|
||||
; CHECK-NEXT: std r6, 0(r5)
|
||||
; CHECK-NEXT: lxvdsx v2, r3, r4
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%p = getelementptr double, double* %a, i64 %idx
|
||||
%0 = load double, double* %p, align 8
|
||||
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
|
||||
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
store double* %p, double** %pptr, align 8
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
define <4 x float> @foof(float* nocapture readonly %a) #0 {
|
||||
; CHECK-LABEL: foof:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lfiwzx f0, 0, r3
|
||||
; CHECK-NEXT: xxspltw v2, vs0, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%0 = load float, float* %a, align 4
|
||||
%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
|
||||
%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x float> %shuffle.i
|
||||
}
|
||||
|
||||
define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
|
||||
; CHECK-LABEL: foofx:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sldi r4, r4, 2
|
||||
; CHECK-NEXT: lfiwzx f0, r3, r4
|
||||
; CHECK-NEXT: xxspltw v2, vs0, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%p = getelementptr float, float* %a, i64 %idx
|
||||
%0 = load float, float* %p, align 4
|
||||
%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
|
||||
%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x float> %shuffle.i
|
||||
}
|
||||
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define <4 x double> @foo(<4 x double>* %p) {
|
||||
entry:
|
||||
%v = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %v
|
||||
}
|
||||
|
||||
; CHECK: @foo
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 31
|
||||
; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
|
||||
; CHECK: blr
|
||||
|
||||
define <4 x double> @bar(<4 x double>* %p) {
|
||||
entry:
|
||||
%v = load <4 x double>, <4 x double>* %p, align 32
|
||||
ret <4 x double> %v
|
||||
}
|
||||
|
||||
; CHECK: @bar
|
||||
; CHECK: qvlfdx
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs -stop-after=finalize-isel < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define <2 x double> @test_qvfmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
|
||||
; CHECK: test_qvfmadd
|
||||
; CHECK: QVFMADD %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <2 x double> %2, %1
|
||||
%5 = fadd reassoc nsz <2 x double> %4, %0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
|
||||
define <4 x float> @test_qvfmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
|
||||
; CHECK: test_qvfmadds
|
||||
; CHECK: QVFMADDSs %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <4 x float> %2, %1
|
||||
%5 = fadd reassoc nsz <4 x float> %4, %0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
|
||||
define <2 x double> @test_qvfnmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
|
||||
; CHECK: test_qvfnmadd
|
||||
; CHECK: QVFNMADD %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <2 x double> %2, %1
|
||||
%5 = fadd reassoc nsz <2 x double> %4, %0
|
||||
%6 = fneg reassoc nsz <2 x double> %5
|
||||
ret <2 x double> %6
|
||||
}
|
||||
|
||||
define <4 x float> @test_qvfnmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
|
||||
; CHECK: test_qvfnmadds
|
||||
; CHECK: QVFNMADDSs %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <4 x float> %2, %1
|
||||
%5 = fadd reassoc nsz <4 x float> %4, %0
|
||||
%6 = fneg reassoc nsz <4 x float> %5
|
||||
ret <4 x float> %6
|
||||
}
|
||||
|
||||
define <2 x double> @test_qvfmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
|
||||
; CHECK: test_qvfmsub
|
||||
; CHECK: QVFMSUB %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <2 x double> %2, %1
|
||||
%5 = fsub reassoc nsz <2 x double> %4, %0
|
||||
ret <2 x double> %5
|
||||
}
|
||||
|
||||
define <4 x float> @test_qvfmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
|
||||
; CHECK: test_qvfmsubs
|
||||
; CHECK: QVFMSUBSs %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <4 x float> %2, %1
|
||||
%5 = fsub reassoc nsz <4 x float> %4, %0
|
||||
ret <4 x float> %5
|
||||
}
|
||||
|
||||
define <2 x double> @test_qvfnmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
|
||||
; CHECK: test_qvfnmsub
|
||||
; CHECK: QVFNMSUB %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <2 x double> %2, %1
|
||||
%5 = fsub reassoc nsz <2 x double> %4, %0
|
||||
%6 = fneg reassoc nsz <2 x double> %5
|
||||
ret <2 x double> %6
|
||||
}
|
||||
|
||||
define <4 x float> @test_qvfnmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
|
||||
; CHECK: test_qvfnmsubs
|
||||
; CHECK: QVFNMSUBSs %2, %1, %0, implicit $rm
|
||||
;
|
||||
%4 = fmul reassoc nsz <4 x float> %2, %1
|
||||
%5 = fsub reassoc nsz <4 x float> %4, %0
|
||||
%6 = fneg reassoc nsz <4 x float> %5
|
||||
ret <4 x float> %6
|
||||
}
|
||||
|
|
@ -1,473 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||
|
||||
define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: foo_fmf:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrte 3, 2
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-NEXT: qvlfdx 0, 0, 3
|
||||
; CHECK-NEXT: qvfmul 4, 3, 3
|
||||
; CHECK-NEXT: qvfmsub 2, 2, 0, 2
|
||||
; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
|
||||
; CHECK-NEXT: qvfmul 3, 3, 4
|
||||
; CHECK-NEXT: qvfmul 4, 3, 3
|
||||
; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
|
||||
; CHECK-NEXT: qvfmul 0, 3, 0
|
||||
; CHECK-NEXT: qvfmul 1, 1, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call ninf afn reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%r = fdiv arcp reassoc <4 x double> %a, %x
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: foo_safe:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 5, 2, 3
|
||||
; CHECK-NEXT: qvesplati 3, 2, 1
|
||||
; CHECK-NEXT: qvesplati 4, 2, 2
|
||||
; CHECK-NEXT: fsqrt 2, 2
|
||||
; CHECK-NEXT: fsqrt 5, 5
|
||||
; CHECK-NEXT: fsqrt 4, 4
|
||||
; CHECK-NEXT: fsqrt 3, 3
|
||||
; CHECK-NEXT: qvesplati 6, 1, 3
|
||||
; CHECK-NEXT: qvgpci 0, 275
|
||||
; CHECK-NEXT: fdiv 2, 1, 2
|
||||
; CHECK-NEXT: fdiv 5, 6, 5
|
||||
; CHECK-NEXT: qvesplati 6, 1, 2
|
||||
; CHECK-NEXT: qvesplati 1, 1, 1
|
||||
; CHECK-NEXT: fdiv 4, 6, 4
|
||||
; CHECK-NEXT: fdiv 1, 1, 3
|
||||
; CHECK-NEXT: qvfperm 3, 4, 5, 0
|
||||
; CHECK-NEXT: qvfperm 0, 2, 1, 0
|
||||
; CHECK-NEXT: qvgpci 1, 101
|
||||
; CHECK-NEXT: qvfperm 1, 0, 3, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%r = fdiv <4 x double> %a, %x
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: foof_fmf:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrtes 3, 2
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l
|
||||
; CHECK-NEXT: qvlfsx 0, 0, 3
|
||||
; CHECK-NEXT: qvfmuls 4, 3, 3
|
||||
; CHECK-NEXT: qvfmsubs 2, 2, 0, 2
|
||||
; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0
|
||||
; CHECK-NEXT: qvfmuls 0, 3, 0
|
||||
; CHECK-NEXT: qvfmul 1, 1, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%y = fpext <4 x float> %x to <4 x double>
|
||||
%r = fdiv arcp reassoc nsz <4 x double> %a, %y
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: foof_safe:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 0, 2, 3
|
||||
; CHECK-NEXT: qvesplati 3, 2, 2
|
||||
; CHECK-NEXT: fsqrts 4, 2
|
||||
; CHECK-NEXT: qvesplati 2, 2, 1
|
||||
; CHECK-NEXT: fsqrts 0, 0
|
||||
; CHECK-NEXT: fsqrts 3, 3
|
||||
; CHECK-NEXT: fsqrts 2, 2
|
||||
; CHECK-NEXT: qvgpci 5, 275
|
||||
; CHECK-NEXT: qvgpci 6, 101
|
||||
; CHECK-NEXT: qvfperm 0, 3, 0, 5
|
||||
; CHECK-NEXT: qvesplati 3, 1, 2
|
||||
; CHECK-NEXT: qvfperm 2, 4, 2, 5
|
||||
; CHECK-NEXT: qvfperm 0, 2, 0, 6
|
||||
; CHECK-NEXT: qvesplati 2, 1, 3
|
||||
; CHECK-NEXT: qvesplati 4, 0, 3
|
||||
; CHECK-NEXT: fdiv 2, 2, 4
|
||||
; CHECK-NEXT: qvesplati 4, 0, 2
|
||||
; CHECK-NEXT: fdiv 3, 3, 4
|
||||
; CHECK-NEXT: qvesplati 4, 1, 1
|
||||
; CHECK-NEXT: fdiv 1, 1, 0
|
||||
; CHECK-NEXT: qvesplati 0, 0, 1
|
||||
; CHECK-NEXT: fdiv 0, 4, 0
|
||||
; CHECK-NEXT: qvfperm 2, 3, 2, 5
|
||||
; CHECK-NEXT: qvfperm 0, 1, 0, 5
|
||||
; CHECK-NEXT: qvfperm 1, 0, 2, 6
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%y = fpext <4 x float> %x to <4 x double>
|
||||
%r = fdiv <4 x double> %a, %y
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: food_fmf:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrte 3, 2
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l
|
||||
; CHECK-NEXT: qvlfdx 0, 0, 3
|
||||
; CHECK-NEXT: qvfmul 4, 3, 3
|
||||
; CHECK-NEXT: qvfmsub 2, 2, 0, 2
|
||||
; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
|
||||
; CHECK-NEXT: qvfmul 3, 3, 4
|
||||
; CHECK-NEXT: qvfmul 4, 3, 3
|
||||
; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
|
||||
; CHECK-NEXT: qvfmul 0, 3, 0
|
||||
; CHECK-NEXT: qvfrsp 0, 0
|
||||
; CHECK-NEXT: qvfmuls 1, 1, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call afn ninf reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%y = fptrunc <4 x double> %x to <4 x float>
|
||||
%r = fdiv arcp reassoc <4 x float> %a, %y
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: food_safe:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 0, 2, 3
|
||||
; CHECK-NEXT: qvesplati 3, 2, 2
|
||||
; CHECK-NEXT: fsqrt 4, 2
|
||||
; CHECK-NEXT: qvesplati 2, 2, 1
|
||||
; CHECK-NEXT: fsqrt 0, 0
|
||||
; CHECK-NEXT: fsqrt 3, 3
|
||||
; CHECK-NEXT: fsqrt 2, 2
|
||||
; CHECK-NEXT: qvgpci 5, 275
|
||||
; CHECK-NEXT: qvgpci 6, 101
|
||||
; CHECK-NEXT: qvfperm 0, 3, 0, 5
|
||||
; CHECK-NEXT: qvesplati 3, 1, 2
|
||||
; CHECK-NEXT: qvfperm 2, 4, 2, 5
|
||||
; CHECK-NEXT: qvfperm 0, 2, 0, 6
|
||||
; CHECK-NEXT: qvesplati 2, 1, 3
|
||||
; CHECK-NEXT: qvfrsp 0, 0
|
||||
; CHECK-NEXT: qvesplati 4, 0, 3
|
||||
; CHECK-NEXT: fdivs 2, 2, 4
|
||||
; CHECK-NEXT: qvesplati 4, 0, 2
|
||||
; CHECK-NEXT: fdivs 3, 3, 4
|
||||
; CHECK-NEXT: qvesplati 4, 1, 1
|
||||
; CHECK-NEXT: fdivs 1, 1, 0
|
||||
; CHECK-NEXT: qvesplati 0, 0, 1
|
||||
; CHECK-NEXT: fdivs 0, 4, 0
|
||||
; CHECK-NEXT: qvfperm 2, 3, 2, 5
|
||||
; CHECK-NEXT: qvfperm 0, 1, 0, 5
|
||||
; CHECK-NEXT: qvfperm 1, 0, 2, 6
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%y = fptrunc <4 x double> %x to <4 x float>
|
||||
%r = fdiv <4 x float> %a, %y
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: goo_fmf:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrtes 3, 2
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l
|
||||
; CHECK-NEXT: qvlfsx 0, 0, 3
|
||||
; CHECK-NEXT: qvfmuls 4, 3, 3
|
||||
; CHECK-NEXT: qvfmsubs 2, 2, 0, 2
|
||||
; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0
|
||||
; CHECK-NEXT: qvfmuls 0, 3, 0
|
||||
; CHECK-NEXT: qvfmuls 1, 1, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%r = fdiv arcp reassoc nsz <4 x float> %a, %x
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: goo_safe:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 5, 2, 3
|
||||
; CHECK-NEXT: qvesplati 3, 2, 1
|
||||
; CHECK-NEXT: qvesplati 4, 2, 2
|
||||
; CHECK-NEXT: fsqrts 2, 2
|
||||
; CHECK-NEXT: fsqrts 5, 5
|
||||
; CHECK-NEXT: fsqrts 4, 4
|
||||
; CHECK-NEXT: fsqrts 3, 3
|
||||
; CHECK-NEXT: qvesplati 6, 1, 3
|
||||
; CHECK-NEXT: qvgpci 0, 275
|
||||
; CHECK-NEXT: fdivs 2, 1, 2
|
||||
; CHECK-NEXT: fdivs 5, 6, 5
|
||||
; CHECK-NEXT: qvesplati 6, 1, 2
|
||||
; CHECK-NEXT: qvesplati 1, 1, 1
|
||||
; CHECK-NEXT: fdivs 4, 6, 4
|
||||
; CHECK-NEXT: fdivs 1, 1, 3
|
||||
; CHECK-NEXT: qvfperm 3, 4, 5, 0
|
||||
; CHECK-NEXT: qvfperm 0, 2, 1, 0
|
||||
; CHECK-NEXT: qvgpci 1, 101
|
||||
; CHECK-NEXT: qvfperm 1, 0, 3, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%r = fdiv <4 x float> %a, %x
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: foo2_fmf:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI8_0@toc@ha
|
||||
; CHECK-NEXT: qvfre 3, 2
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l
|
||||
; CHECK-NEXT: qvlfdx 0, 0, 3
|
||||
; CHECK-NEXT: qvfmadd 0, 2, 3, 0
|
||||
; CHECK-NEXT: qvfnmsub 0, 3, 0, 3
|
||||
; CHECK-NEXT: qvfmul 3, 1, 0
|
||||
; CHECK-NEXT: qvfnmsub 1, 2, 3, 1
|
||||
; CHECK-NEXT: qvfmadd 1, 0, 1, 3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: foo2_safe:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: qvesplati 3, 2, 3
|
||||
; CHECK-NEXT: qvesplati 4, 1, 3
|
||||
; CHECK-NEXT: qvesplati 5, 2, 2
|
||||
; CHECK-NEXT: qvgpci 0, 275
|
||||
; CHECK-NEXT: fdiv 3, 4, 3
|
||||
; CHECK-NEXT: qvesplati 4, 1, 2
|
||||
; CHECK-NEXT: fdiv 4, 4, 5
|
||||
; CHECK-NEXT: fdiv 5, 1, 2
|
||||
; CHECK-NEXT: qvesplati 2, 2, 1
|
||||
; CHECK-NEXT: qvesplati 1, 1, 1
|
||||
; CHECK-NEXT: fdiv 1, 1, 2
|
||||
; CHECK-NEXT: qvfperm 2, 4, 3, 0
|
||||
; CHECK-NEXT: qvfperm 0, 5, 1, 0
|
||||
; CHECK-NEXT: qvgpci 1, 101
|
||||
; CHECK-NEXT: qvfperm 1, 0, 2, 1
|
||||
; CHECK-NEXT: blr
|
||||
%r = fdiv <4 x double> %a, %b
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: goo2_fmf:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvfres 0, 2
|
||||
; CHECK-NEXT: qvfmuls 3, 1, 0
|
||||
; CHECK-NEXT: qvfnmsubs 1, 2, 3, 1
|
||||
; CHECK-NEXT: qvfmadds 1, 0, 1, 3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = fdiv arcp reassoc ninf <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: goo2_safe:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 3, 2, 3
|
||||
; CHECK-NEXT: qvesplati 4, 1, 3
|
||||
; CHECK-NEXT: qvesplati 5, 2, 2
|
||||
; CHECK-NEXT: qvgpci 0, 275
|
||||
; CHECK-NEXT: fdivs 3, 4, 3
|
||||
; CHECK-NEXT: qvesplati 4, 1, 2
|
||||
; CHECK-NEXT: fdivs 4, 4, 5
|
||||
; CHECK-NEXT: fdivs 5, 1, 2
|
||||
; CHECK-NEXT: qvesplati 2, 2, 1
|
||||
; CHECK-NEXT: qvesplati 1, 1, 1
|
||||
; CHECK-NEXT: fdivs 1, 1, 2
|
||||
; CHECK-NEXT: qvfperm 2, 4, 3, 0
|
||||
; CHECK-NEXT: qvfperm 0, 5, 1, 0
|
||||
; CHECK-NEXT: qvgpci 1, 101
|
||||
; CHECK-NEXT: qvfperm 1, 0, 2, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = fdiv <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo3_fmf_denorm_on(<4 x double> %a) #0 {
|
||||
; CHECK-LABEL: foo3_fmf_denorm_on:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI12_0@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrte 0, 1
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI12_0@toc@l
|
||||
; CHECK-NEXT: qvlfdx 2, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI12_1@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI12_1@toc@l
|
||||
; CHECK-NEXT: qvfmul 3, 0, 0
|
||||
; CHECK-NEXT: qvfmsub 4, 1, 2, 1
|
||||
; CHECK-NEXT: qvfnmsub 3, 4, 3, 2
|
||||
; CHECK-NEXT: qvfmul 0, 0, 3
|
||||
; CHECK-NEXT: qvfmul 3, 0, 0
|
||||
; CHECK-NEXT: qvfnmsub 2, 4, 3, 2
|
||||
; CHECK-NEXT: qvfmul 0, 0, 2
|
||||
; CHECK-NEXT: qvlfdx 2, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI12_2@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI12_2@toc@l
|
||||
; CHECK-NEXT: qvlfdx 3, 0, 3
|
||||
; CHECK-NEXT: qvfmul 0, 0, 1
|
||||
; CHECK-NEXT: qvfabs 1, 1
|
||||
; CHECK-NEXT: qvfcmplt 1, 1, 2
|
||||
; CHECK-NEXT: qvfsel 1, 1, 3, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = call reassoc ninf afn <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo3_fmf_denorm_off(<4 x double> %a) #1 {
|
||||
; CHECK-LABEL: foo3_fmf_denorm_off:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI13_0@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrte 0, 1
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI13_0@toc@l
|
||||
; CHECK-NEXT: qvlfdx 2, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI13_1@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI13_1@toc@l
|
||||
; CHECK-NEXT: qvfmul 3, 0, 0
|
||||
; CHECK-NEXT: qvfmsub 4, 1, 2, 1
|
||||
; CHECK-NEXT: qvfnmsub 3, 4, 3, 2
|
||||
; CHECK-NEXT: qvfmul 0, 0, 3
|
||||
; CHECK-NEXT: qvfmul 3, 0, 0
|
||||
; CHECK-NEXT: qvfnmsub 2, 4, 3, 2
|
||||
; CHECK-NEXT: qvfmul 0, 0, 2
|
||||
; CHECK-NEXT: qvlfdx 2, 0, 3
|
||||
; CHECK-NEXT: qvfmul 0, 0, 1
|
||||
; CHECK-NEXT: qvfcmpeq 1, 1, 2
|
||||
; CHECK-NEXT: qvfsel 1, 1, 2, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = call afn reassoc ninf <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo3_safe_denorm_on(<4 x double> %a) #0 {
|
||||
; CHECK-LABEL: foo3_safe_denorm_on:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 2, 1, 3
|
||||
; CHECK-NEXT: qvesplati 3, 1, 2
|
||||
; CHECK-NEXT: fsqrt 4, 1
|
||||
; CHECK-NEXT: qvesplati 1, 1, 1
|
||||
; CHECK-NEXT: fsqrt 2, 2
|
||||
; CHECK-NEXT: fsqrt 3, 3
|
||||
; CHECK-NEXT: fsqrt 1, 1
|
||||
; CHECK-NEXT: qvgpci 0, 275
|
||||
; CHECK-NEXT: qvfperm 2, 3, 2, 0
|
||||
; CHECK-NEXT: qvfperm 0, 4, 1, 0
|
||||
; CHECK-NEXT: qvgpci 1, 101
|
||||
; CHECK-NEXT: qvfperm 1, 0, 2, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo3_safe_denorm_off(<4 x double> %a) #1 {
|
||||
; CHECK-LABEL: foo3_safe_denorm_off:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 2, 1, 3
|
||||
; CHECK-NEXT: qvesplati 3, 1, 2
|
||||
; CHECK-NEXT: fsqrt 4, 1
|
||||
; CHECK-NEXT: qvesplati 1, 1, 1
|
||||
; CHECK-NEXT: fsqrt 2, 2
|
||||
; CHECK-NEXT: fsqrt 3, 3
|
||||
; CHECK-NEXT: fsqrt 1, 1
|
||||
; CHECK-NEXT: qvgpci 0, 275
|
||||
; CHECK-NEXT: qvfperm 2, 3, 2, 0
|
||||
; CHECK-NEXT: qvfperm 0, 4, 1, 0
|
||||
; CHECK-NEXT: qvgpci 1, 101
|
||||
; CHECK-NEXT: qvfperm 1, 0, 2, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo3_fmf_denorm_on(<4 x float> %a) #0 {
|
||||
; CHECK-LABEL: goo3_fmf_denorm_on:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI16_1@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrtes 2, 1
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI16_1@toc@l
|
||||
; CHECK-NEXT: qvlfsx 0, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI16_0@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI16_0@toc@l
|
||||
; CHECK-NEXT: qvfmuls 4, 2, 2
|
||||
; CHECK-NEXT: qvfmsubs 3, 1, 0, 1
|
||||
; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0
|
||||
; CHECK-NEXT: qvlfsx 3, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI16_2@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI16_2@toc@l
|
||||
; CHECK-NEXT: qvlfsx 4, 0, 3
|
||||
; CHECK-NEXT: qvfmuls 0, 2, 0
|
||||
; CHECK-NEXT: qvfabs 2, 1
|
||||
; CHECK-NEXT: qvfmuls 0, 0, 1
|
||||
; CHECK-NEXT: qvfcmplt 1, 2, 3
|
||||
; CHECK-NEXT: qvfsel 1, 1, 4, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = call reassoc afn ninf nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo3_fmf_denorm_off(<4 x float> %a) #1 {
|
||||
; CHECK-LABEL: goo3_fmf_denorm_off:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI17_1@toc@ha
|
||||
; CHECK-NEXT: qvfrsqrtes 2, 1
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI17_1@toc@l
|
||||
; CHECK-NEXT: qvlfsx 0, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l
|
||||
; CHECK-NEXT: qvfmuls 4, 2, 2
|
||||
; CHECK-NEXT: qvfmsubs 3, 1, 0, 1
|
||||
; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0
|
||||
; CHECK-NEXT: qvlfsx 3, 0, 3
|
||||
; CHECK-NEXT: qvfmuls 0, 2, 0
|
||||
; CHECK-NEXT: qvfmuls 0, 0, 1
|
||||
; CHECK-NEXT: qvfcmpeq 1, 1, 3
|
||||
; CHECK-NEXT: qvfsel 1, 1, 3, 0
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = call reassoc ninf afn nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo3_safe(<4 x float> %a) nounwind {
|
||||
; CHECK-LABEL: goo3_safe:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: qvesplati 2, 1, 3
|
||||
; CHECK-NEXT: qvesplati 3, 1, 2
|
||||
; CHECK-NEXT: fsqrts 4, 1
|
||||
; CHECK-NEXT: qvesplati 1, 1, 1
|
||||
; CHECK-NEXT: fsqrts 2, 2
|
||||
; CHECK-NEXT: fsqrts 3, 3
|
||||
; CHECK-NEXT: fsqrts 1, 1
|
||||
; CHECK-NEXT: qvgpci 0, 275
|
||||
; CHECK-NEXT: qvfperm 2, 3, 2, 0
|
||||
; CHECK-NEXT: qvfperm 0, 4, 1, 0
|
||||
; CHECK-NEXT: qvgpci 1, 101
|
||||
; CHECK-NEXT: qvfperm 1, 0, 2, 1
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "denormal-fp-math"="ieee,ieee" }
|
||||
attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
|
|
@ -1,109 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
define <4 x float> @test1(<4 x float> %x) nounwind {
|
||||
%call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
|
||||
ret <4 x float> %call
|
||||
|
||||
; CHECK: test1:
|
||||
; CHECK: qvfrim 1, 1
|
||||
|
||||
; CHECK-FM: test1:
|
||||
; CHECK-FM: qvfrim 1, 1
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x double> @test2(<4 x double> %x) nounwind {
|
||||
%call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
|
||||
ret <4 x double> %call
|
||||
|
||||
; CHECK: test2:
|
||||
; CHECK: qvfrim 1, 1
|
||||
|
||||
; CHECK-FM: test2:
|
||||
; CHECK-FM: qvfrim 1, 1
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
|
||||
|
||||
define <4 x float> @test3(<4 x float> %x) nounwind {
|
||||
%call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
|
||||
ret <4 x float> %call
|
||||
|
||||
; CHECK: test3:
|
||||
; CHECK-NOT: qvfrin
|
||||
|
||||
; CHECK-FM: test3:
|
||||
; CHECK-FM-NOT: qvfrin
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x double> @test4(<4 x double> %x) nounwind {
|
||||
%call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
|
||||
ret <4 x double> %call
|
||||
|
||||
; CHECK: test4:
|
||||
; CHECK-NOT: qvfrin
|
||||
|
||||
; CHECK-FM: test4:
|
||||
; CHECK-FM-NOT: qvfrin
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
|
||||
|
||||
define <4 x float> @test5(<4 x float> %x) nounwind {
|
||||
%call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
|
||||
ret <4 x float> %call
|
||||
|
||||
; CHECK: test5:
|
||||
; CHECK: qvfrip 1, 1
|
||||
|
||||
; CHECK-FM: test5:
|
||||
; CHECK-FM: qvfrip 1, 1
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x double> @test6(<4 x double> %x) nounwind {
|
||||
%call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
|
||||
ret <4 x double> %call
|
||||
|
||||
; CHECK: test6:
|
||||
; CHECK: qvfrip 1, 1
|
||||
|
||||
; CHECK-FM: test6:
|
||||
; CHECK-FM: qvfrip 1, 1
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
|
||||
|
||||
define <4 x float> @test9(<4 x float> %x) nounwind {
|
||||
%call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
|
||||
ret <4 x float> %call
|
||||
|
||||
; CHECK: test9:
|
||||
; CHECK: qvfriz 1, 1
|
||||
|
||||
; CHECK-FM: test9:
|
||||
; CHECK-FM: qvfriz 1, 1
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
|
||||
|
||||
define <4 x double> @test10(<4 x double> %x) nounwind {
|
||||
%call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
|
||||
ret <4 x double> %call
|
||||
|
||||
; CHECK: test10:
|
||||
; CHECK: qvfriz 1, 1
|
||||
|
||||
; CHECK-FM: test10:
|
||||
; CHECK-FM: qvfriz 1, 1
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define <4 x float> @foo(<4 x float>* %p) {
|
||||
entry:
|
||||
%v = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; CHECK: @foo
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 15
|
||||
; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
|
||||
; CHECK: blr
|
||||
|
||||
define <4 x float> @bar(<4 x float>* %p) {
|
||||
entry:
|
||||
%v = load <4 x float>, <4 x float>* %p, align 16
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; CHECK: @bar
|
||||
; CHECK: qvlfsx
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
|
||||
|
||||
define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
|
||||
entry:
|
||||
%r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: qvfsel 1, 3, 1, 2
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
|
||||
entry:
|
||||
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
|
||||
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
|
||||
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
|
||||
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
|
||||
%r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: stw
|
||||
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
|
||||
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
|
||||
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
|
||||
; CHECK: qvfsel 1, [[REG4]], 1, 2
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
|
||||
entry:
|
||||
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
|
||||
ret <4 x i1> %v
|
||||
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK: qvlfsx [[REG:[0-9]+]],
|
||||
; qvflogical 1, 1, [[REG]], 1
|
||||
; blr
|
||||
}
|
||||
|
||||
define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
|
||||
entry:
|
||||
%q = load <4 x i1>, <4 x i1>* %t, align 16
|
||||
%v = and <4 x i1> %a, %q
|
||||
ret <4 x i1> %v
|
||||
|
||||
; CHECK-LABEL: @test4
|
||||
; CHECK-DAG: lbz
|
||||
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK-DAG: stw
|
||||
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
|
||||
; CHECK: qvfand 1, 1, [[REG4]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test5(<4 x i1> %a) nounwind {
|
||||
entry:
|
||||
store <4 x i1> %a, <4 x i1>* @R
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test5
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK: lwz
|
||||
; CHECK: stb
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i1 @test6(<4 x i1> %a) nounwind {
|
||||
entry:
|
||||
%r = extractelement <4 x i1> %a, i32 2
|
||||
ret i1 %r
|
||||
|
||||
; CHECK-LABEL: @test6
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK: lwz
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i1 @test7(<4 x i1> %a) nounwind {
|
||||
entry:
|
||||
%r = extractelement <4 x i1> %a, i32 2
|
||||
%s = extractelement <4 x i1> %a, i32 3
|
||||
%q = and i1 %r, %s
|
||||
ret i1 %q
|
||||
|
||||
; CHECK-LABEL: @test7
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK-DAG: lwz [[REG4:[0-9]+]],
|
||||
; FIXME: We're storing the vector twice, and that's silly.
|
||||
; CHECK-DAG: qvstfiwx [[REG3]],
|
||||
; CHECK: lwz [[REG5:[0-9]+]],
|
||||
; CHECK: and 3,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i1 @test8(<3 x i1> %a) nounwind {
|
||||
entry:
|
||||
%r = extractelement <3 x i1> %a, i32 2
|
||||
ret i1 %r
|
||||
|
||||
; CHECK-LABEL: @test8
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK: lwz
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
|
||||
entry:
|
||||
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
|
||||
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
|
||||
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
|
||||
%r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
|
||||
ret <3 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test9
|
||||
; CHECK: stw
|
||||
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
|
||||
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
|
||||
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
|
||||
; CHECK: qvfsel 1, [[REG4]], 1, 2
|
||||
; CHECK: blr
|
||||
}
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define void @foo(<4 x float> %v, <4 x float>* %p) {
|
||||
entry:
|
||||
store <4 x float> %v, <4 x float>* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @foo
|
||||
; CHECK: stfs
|
||||
; CHECK: stfs
|
||||
; CHECK: stfs
|
||||
; CHECK: stfs
|
||||
; CHECK: blr
|
||||
|
||||
define void @bar(<4 x float> %v, <4 x float>* %p) {
|
||||
entry:
|
||||
store <4 x float> %v, <4 x float>* %p, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @bar
|
||||
; CHECK: qvstfsx
|
||||
|
|
@ -1,151 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
|
||||
|
||||
define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
|
||||
entry:
|
||||
%r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: qvfsel 1, 3, 1, 2
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
|
||||
entry:
|
||||
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
|
||||
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
|
||||
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
|
||||
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
|
||||
%r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
|
||||
; FIXME: This load/store sequence is unnecessary.
|
||||
; CHECK-DAG: lbz
|
||||
; CHECK-DAG: stw
|
||||
|
||||
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
|
||||
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
|
||||
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
|
||||
; CHECK: qvfsel 1, [[REG4]], 1, 2
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
|
||||
entry:
|
||||
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
|
||||
ret <4 x i1> %v
|
||||
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK: qvlfsx [[REG:[0-9]+]],
|
||||
; qvflogical 1, 1, [[REG]], 1
|
||||
; blr
|
||||
}
|
||||
|
||||
define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
|
||||
entry:
|
||||
%q = load <4 x i1>, <4 x i1>* %t, align 16
|
||||
%v = and <4 x i1> %a, %q
|
||||
ret <4 x i1> %v
|
||||
|
||||
; CHECK-LABEL: @test4
|
||||
; CHECK-DAG: lbz
|
||||
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK-DAG: stw
|
||||
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
|
||||
; CHECK: qvfand 1, 1, [[REG4]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test5(<4 x i1> %a) nounwind {
|
||||
entry:
|
||||
store <4 x i1> %a, <4 x i1>* @R
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test5
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK: lwz
|
||||
; CHECK: stb
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i1 @test6(<4 x i1> %a) nounwind {
|
||||
entry:
|
||||
%r = extractelement <4 x i1> %a, i32 2
|
||||
ret i1 %r
|
||||
|
||||
; CHECK-LABEL: @test6
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK: lwz
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i1 @test7(<4 x i1> %a) nounwind {
|
||||
entry:
|
||||
%r = extractelement <4 x i1> %a, i32 2
|
||||
%s = extractelement <4 x i1> %a, i32 3
|
||||
%q = and i1 %r, %s
|
||||
ret i1 %q
|
||||
|
||||
; CHECK-LABEL: @test7
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK-DAG: lwz [[REG4:[0-9]+]],
|
||||
; FIXME: We're storing the vector twice, and that's silly.
|
||||
; CHECK-DAG: qvstfiwx [[REG3]],
|
||||
; CHECK-DAG: lwz [[REG5:[0-9]+]],
|
||||
; CHECK: and 3,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i1 @test8(<3 x i1> %a) nounwind {
|
||||
entry:
|
||||
%r = extractelement <3 x i1> %a, i32 2
|
||||
ret i1 %r
|
||||
|
||||
; CHECK-LABEL: @test8
|
||||
; CHECK: qvlfdx [[REG1:[0-9]+]],
|
||||
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
|
||||
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: qvstfiwx [[REG3]],
|
||||
; CHECK: lwz
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
|
||||
entry:
|
||||
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
|
||||
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
|
||||
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
|
||||
%r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
|
||||
ret <3 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test9
|
||||
|
||||
; FIXME: This load/store sequence is unnecessary.
|
||||
; CHECK-DAG: lbz
|
||||
; CHECK-DAG: stw
|
||||
|
||||
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
|
||||
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
|
||||
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
|
||||
; CHECK: qvfsel 1, [[REG4]], 1, 2
|
||||
; CHECK: blr
|
||||
}
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @gsl_sf_legendre_Pl_deriv_array(<4 x i32> %inp1, <4 x double> %inp2) #0 {
|
||||
entry:
|
||||
br label %vector.body198
|
||||
|
||||
vector.body198: ; preds = %vector.body198, %for.body46.lr.ph
|
||||
%0 = icmp ne <4 x i32> %inp1, zeroinitializer
|
||||
%1 = select <4 x i1> %0, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
|
||||
%2 = fmul <4 x double> %inp2, %1
|
||||
%3 = fmul <4 x double> %inp2, %2
|
||||
%4 = fmul <4 x double> %3, %inp2
|
||||
store <4 x double> %4, <4 x double>* undef, align 8
|
||||
br label %return
|
||||
|
||||
; CHECK-LABEL: @gsl_sf_legendre_Pl_deriv_array
|
||||
; CHECK: qvlfiwzx
|
||||
; CHECK: qvfcfidu
|
||||
; CHECK: qvfcmpeq
|
||||
; CHECK: qvfsel
|
||||
; CHECK: qvfmul
|
||||
|
||||
return: ; preds = %if.else.i
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define void @foo(<4 x double> %v, <4 x double>* %p) {
|
||||
entry:
|
||||
store <4 x double> %v, <4 x double>* %p, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @foo
|
||||
; CHECK: stfd
|
||||
; CHECK: stfd
|
||||
; CHECK: stfd
|
||||
; CHECK: stfd
|
||||
; CHECK: blr
|
||||
|
||||
define void @bar(<4 x double> %v, <4 x double>* %p) {
|
||||
entry:
|
||||
store <4 x double> %v, <4 x double>* %p, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @bar
|
||||
; CHECK: qvstfdx
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
|
||||
entry:
|
||||
br label %vector.body
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; Make sure that the offset constants we use are all even (only the last should be odd).
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1056
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1088
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1152
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1216
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1280
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1344
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1408
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1472
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1536
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1600
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1568
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1664
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1632
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1728
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1696
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1792
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1760
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1856
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1824
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1920
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1888
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1984
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1952
|
||||
; CHECK-DAG: li {{[0-9]+}}, 2016
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1024
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1120
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1184
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1248
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1312
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1376
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1440
|
||||
; CHECK-DAG: li {{[0-9]+}}, 1504
|
||||
; CHECK-DAG: li {{[0-9]+}}, 2047
|
||||
; CHECK: blr
|
||||
|
||||
vector.body: ; preds = %vector.body, %entry
|
||||
%index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
|
||||
%0 = shl i64 %index, 1
|
||||
%1 = getelementptr inbounds double, double* %b, i64 %0
|
||||
%2 = bitcast double* %1 to <8 x double>*
|
||||
%wide.vec = load <8 x double>, <8 x double>* %2, align 8
|
||||
%strided.vec = shufflevector <8 x double> %wide.vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%3 = fadd <4 x double> %strided.vec, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%4 = getelementptr inbounds double, double* %a, i64 %index
|
||||
%5 = bitcast double* %4 to <4 x double>*
|
||||
store <4 x double> %3, <4 x double>* %5, align 8
|
||||
%index.next = or i64 %index, 4
|
||||
%6 = shl i64 %index.next, 1
|
||||
%7 = getelementptr inbounds double, double* %b, i64 %6
|
||||
%8 = bitcast double* %7 to <8 x double>*
|
||||
%wide.vec.1 = load <8 x double>, <8 x double>* %8, align 8
|
||||
%strided.vec.1 = shufflevector <8 x double> %wide.vec.1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%9 = fadd <4 x double> %strided.vec.1, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%10 = getelementptr inbounds double, double* %a, i64 %index.next
|
||||
%11 = bitcast double* %10 to <4 x double>*
|
||||
store <4 x double> %9, <4 x double>* %11, align 8
|
||||
%index.next.1 = or i64 %index, 8
|
||||
%12 = shl i64 %index.next.1, 1
|
||||
%13 = getelementptr inbounds double, double* %b, i64 %12
|
||||
%14 = bitcast double* %13 to <8 x double>*
|
||||
%wide.vec.2 = load <8 x double>, <8 x double>* %14, align 8
|
||||
%strided.vec.2 = shufflevector <8 x double> %wide.vec.2, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%15 = fadd <4 x double> %strided.vec.2, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%16 = getelementptr inbounds double, double* %a, i64 %index.next.1
|
||||
%17 = bitcast double* %16 to <4 x double>*
|
||||
store <4 x double> %15, <4 x double>* %17, align 8
|
||||
%index.next.2 = or i64 %index, 12
|
||||
%18 = shl i64 %index.next.2, 1
|
||||
%19 = getelementptr inbounds double, double* %b, i64 %18
|
||||
%20 = bitcast double* %19 to <8 x double>*
|
||||
%wide.vec.3 = load <8 x double>, <8 x double>* %20, align 8
|
||||
%strided.vec.3 = shufflevector <8 x double> %wide.vec.3, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%21 = fadd <4 x double> %strided.vec.3, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%22 = getelementptr inbounds double, double* %a, i64 %index.next.2
|
||||
%23 = bitcast double* %22 to <4 x double>*
|
||||
store <4 x double> %21, <4 x double>* %23, align 8
|
||||
%index.next.3 = or i64 %index, 16
|
||||
%24 = shl i64 %index.next.3, 1
|
||||
%25 = getelementptr inbounds double, double* %b, i64 %24
|
||||
%26 = bitcast double* %25 to <8 x double>*
|
||||
%wide.vec.4 = load <8 x double>, <8 x double>* %26, align 8
|
||||
%strided.vec.4 = shufflevector <8 x double> %wide.vec.4, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%27 = fadd <4 x double> %strided.vec.4, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%28 = getelementptr inbounds double, double* %a, i64 %index.next.3
|
||||
%29 = bitcast double* %28 to <4 x double>*
|
||||
store <4 x double> %27, <4 x double>* %29, align 8
|
||||
%index.next.4 = or i64 %index, 20
|
||||
%30 = shl i64 %index.next.4, 1
|
||||
%31 = getelementptr inbounds double, double* %b, i64 %30
|
||||
%32 = bitcast double* %31 to <8 x double>*
|
||||
%wide.vec.5 = load <8 x double>, <8 x double>* %32, align 8
|
||||
%strided.vec.5 = shufflevector <8 x double> %wide.vec.5, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%33 = fadd <4 x double> %strided.vec.5, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%34 = getelementptr inbounds double, double* %a, i64 %index.next.4
|
||||
%35 = bitcast double* %34 to <4 x double>*
|
||||
store <4 x double> %33, <4 x double>* %35, align 8
|
||||
%index.next.5 = or i64 %index, 24
|
||||
%36 = shl i64 %index.next.5, 1
|
||||
%37 = getelementptr inbounds double, double* %b, i64 %36
|
||||
%38 = bitcast double* %37 to <8 x double>*
|
||||
%wide.vec.6 = load <8 x double>, <8 x double>* %38, align 8
|
||||
%strided.vec.6 = shufflevector <8 x double> %wide.vec.6, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%39 = fadd <4 x double> %strided.vec.6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%40 = getelementptr inbounds double, double* %a, i64 %index.next.5
|
||||
%41 = bitcast double* %40 to <4 x double>*
|
||||
store <4 x double> %39, <4 x double>* %41, align 8
|
||||
%index.next.6 = or i64 %index, 28
|
||||
%42 = shl i64 %index.next.6, 1
|
||||
%43 = getelementptr inbounds double, double* %b, i64 %42
|
||||
%44 = bitcast double* %43 to <8 x double>*
|
||||
%wide.vec.7 = load <8 x double>, <8 x double>* %44, align 8
|
||||
%strided.vec.7 = shufflevector <8 x double> %wide.vec.7, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%45 = fadd <4 x double> %strided.vec.7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%46 = getelementptr inbounds double, double* %a, i64 %index.next.6
|
||||
%47 = bitcast double* %46 to <4 x double>*
|
||||
store <4 x double> %45, <4 x double>* %47, align 8
|
||||
%index.next.7 = or i64 %index, 32
|
||||
%48 = shl i64 %index.next.7, 1
|
||||
%49 = getelementptr inbounds double, double* %b, i64 %48
|
||||
%50 = bitcast double* %49 to <8 x double>*
|
||||
%wide.vec.8 = load <8 x double>, <8 x double>* %50, align 8
|
||||
%strided.vec.8 = shufflevector <8 x double> %wide.vec.8, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%51 = fadd <4 x double> %strided.vec.8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%52 = getelementptr inbounds double, double* %a, i64 %index.next.7
|
||||
%53 = bitcast double* %52 to <4 x double>*
|
||||
store <4 x double> %51, <4 x double>* %53, align 8
|
||||
%index.next.8 = or i64 %index, 36
|
||||
%54 = shl i64 %index.next.8, 1
|
||||
%55 = getelementptr inbounds double, double* %b, i64 %54
|
||||
%56 = bitcast double* %55 to <8 x double>*
|
||||
%wide.vec.9 = load <8 x double>, <8 x double>* %56, align 8
|
||||
%strided.vec.9 = shufflevector <8 x double> %wide.vec.9, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%57 = fadd <4 x double> %strided.vec.9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%58 = getelementptr inbounds double, double* %a, i64 %index.next.8
|
||||
%59 = bitcast double* %58 to <4 x double>*
|
||||
store <4 x double> %57, <4 x double>* %59, align 8
|
||||
%index.next.9 = or i64 %index, 40
|
||||
%60 = shl i64 %index.next.9, 1
|
||||
%61 = getelementptr inbounds double, double* %b, i64 %60
|
||||
%62 = bitcast double* %61 to <8 x double>*
|
||||
%wide.vec.10 = load <8 x double>, <8 x double>* %62, align 8
|
||||
%strided.vec.10 = shufflevector <8 x double> %wide.vec.10, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%63 = fadd <4 x double> %strided.vec.10, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%64 = getelementptr inbounds double, double* %a, i64 %index.next.9
|
||||
%65 = bitcast double* %64 to <4 x double>*
|
||||
store <4 x double> %63, <4 x double>* %65, align 8
|
||||
%index.next.10 = or i64 %index, 44
|
||||
%66 = shl i64 %index.next.10, 1
|
||||
%67 = getelementptr inbounds double, double* %b, i64 %66
|
||||
%68 = bitcast double* %67 to <8 x double>*
|
||||
%wide.vec.11 = load <8 x double>, <8 x double>* %68, align 8
|
||||
%strided.vec.11 = shufflevector <8 x double> %wide.vec.11, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%69 = fadd <4 x double> %strided.vec.11, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%70 = getelementptr inbounds double, double* %a, i64 %index.next.10
|
||||
%71 = bitcast double* %70 to <4 x double>*
|
||||
store <4 x double> %69, <4 x double>* %71, align 8
|
||||
%index.next.11 = or i64 %index, 48
|
||||
%72 = shl i64 %index.next.11, 1
|
||||
%73 = getelementptr inbounds double, double* %b, i64 %72
|
||||
%74 = bitcast double* %73 to <8 x double>*
|
||||
%wide.vec.12 = load <8 x double>, <8 x double>* %74, align 8
|
||||
%strided.vec.12 = shufflevector <8 x double> %wide.vec.12, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%75 = fadd <4 x double> %strided.vec.12, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%76 = getelementptr inbounds double, double* %a, i64 %index.next.11
|
||||
%77 = bitcast double* %76 to <4 x double>*
|
||||
store <4 x double> %75, <4 x double>* %77, align 8
|
||||
%index.next.12 = or i64 %index, 52
|
||||
%78 = shl i64 %index.next.12, 1
|
||||
%79 = getelementptr inbounds double, double* %b, i64 %78
|
||||
%80 = bitcast double* %79 to <8 x double>*
|
||||
%wide.vec.13 = load <8 x double>, <8 x double>* %80, align 8
|
||||
%strided.vec.13 = shufflevector <8 x double> %wide.vec.13, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%81 = fadd <4 x double> %strided.vec.13, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%82 = getelementptr inbounds double, double* %a, i64 %index.next.12
|
||||
%83 = bitcast double* %82 to <4 x double>*
|
||||
store <4 x double> %81, <4 x double>* %83, align 8
|
||||
%index.next.13 = or i64 %index, 56
|
||||
%84 = shl i64 %index.next.13, 1
|
||||
%85 = getelementptr inbounds double, double* %b, i64 %84
|
||||
%86 = bitcast double* %85 to <8 x double>*
|
||||
%wide.vec.14 = load <8 x double>, <8 x double>* %86, align 8
|
||||
%strided.vec.14 = shufflevector <8 x double> %wide.vec.14, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%87 = fadd <4 x double> %strided.vec.14, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%88 = getelementptr inbounds double, double* %a, i64 %index.next.13
|
||||
%89 = bitcast double* %88 to <4 x double>*
|
||||
store <4 x double> %87, <4 x double>* %89, align 8
|
||||
%index.next.14 = or i64 %index, 60
|
||||
%90 = shl i64 %index.next.14, 1
|
||||
%91 = getelementptr inbounds double, double* %b, i64 %90
|
||||
%92 = bitcast double* %91 to <8 x double>*
|
||||
%wide.vec.15 = load <8 x double>, <8 x double>* %92, align 8
|
||||
%strided.vec.15 = shufflevector <8 x double> %wide.vec.15, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%93 = fadd <4 x double> %strided.vec.15, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
|
||||
%94 = getelementptr inbounds double, double* %a, i64 %index.next.14
|
||||
%95 = bitcast double* %94 to <4 x double>*
|
||||
store <4 x double> %93, <4 x double>* %95, align 8
|
||||
%index.next.15 = add nsw i64 %index, 64
|
||||
%96 = icmp eq i64 %index.next.15, 1600
|
||||
br i1 %96, label %for.cond.cleanup, label %vector.body
|
||||
|
||||
for.cond.cleanup: ; preds = %vector.body
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="a2q" }
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
|
||||
define <4 x double> @foo(<4 x double>* %a) {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %a, align 32
|
||||
ret <4 x double> %r
|
||||
; CHECK: qvlfdx
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x double> @bar(<4 x double>* %a) {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %a, align 8
|
||||
%b = getelementptr <4 x double>, <4 x double>* %a, i32 16
|
||||
%s = load <4 x double>, <4 x double>* %b, align 32
|
||||
%t = fadd <4 x double> %r, %s
|
||||
ret <4 x double> %t
|
||||
; CHECK: qvlpcldx
|
||||
; CHECK: qvlfdx
|
||||
; CHECK: qvfperm
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x double> @bar1(<4 x double>* %a) {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %a, align 8
|
||||
%b = getelementptr <4 x double>, <4 x double>* %a, i32 16
|
||||
%s = load <4 x double>, <4 x double>* %b, align 8
|
||||
%t = fadd <4 x double> %r, %s
|
||||
ret <4 x double> %t
|
||||
}
|
||||
|
||||
define <4 x double> @bar2(<4 x double>* %a) {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %a, align 8
|
||||
%b = getelementptr <4 x double>, <4 x double>* %a, i32 1
|
||||
%s = load <4 x double>, <4 x double>* %b, align 32
|
||||
%t = fadd <4 x double> %r, %s
|
||||
ret <4 x double> %t
|
||||
}
|
||||
|
||||
define <4 x double> @bar3(<4 x double>* %a) {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %a, align 8
|
||||
%b = getelementptr <4 x double>, <4 x double>* %a, i32 1
|
||||
%s = load <4 x double>, <4 x double>* %b, align 8
|
||||
%t = fadd <4 x double> %r, %s
|
||||
ret <4 x double> %t
|
||||
}
|
||||
|
||||
define <4 x double> @bar4(<4 x double>* %a) {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %a, align 8
|
||||
%b = getelementptr <4 x double>, <4 x double>* %a, i32 1
|
||||
%s = load <4 x double>, <4 x double>* %b, align 8
|
||||
%c = getelementptr <4 x double>, <4 x double>* %b, i32 1
|
||||
%t = load <4 x double>, <4 x double>* %c, align 8
|
||||
%u = fadd <4 x double> %r, %s
|
||||
%v = fadd <4 x double> %u, %t
|
||||
ret <4 x double> %v
|
||||
}
|
||||
|
|
@ -1,6 +1,4 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits < %s | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-bgq-linux"
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
define void @test() align 2 {
|
||||
entry:
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue