[PowerPC] Remove QPX/A2Q BGQ/BGP CNK support

Per RFC http://lists.llvm.org/pipermail/llvm-dev/2020-April/141295.html
no one is making use of QPX/A2Q/BGQ/BGP CNK anymore.

This patch remove the support of QPX/A2Q in llvm, BGQ/BGP in clang,
CNK support in openmp/polly.

Reviewed By: hfinkel

Differential Revision: https://reviews.llvm.org/D83915
This commit is contained in:
Jinsong Ji 2020-07-27 18:01:40 +00:00
parent fbe911ee75
commit adffce7153
135 changed files with 174 additions and 6525 deletions

View File

@ -46,8 +46,6 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasP8Crypto = true;
} else if (Feature == "+direct-move") {
HasDirectMove = true;
} else if (Feature == "+qpx") {
HasQPX = true;
} else if (Feature == "+htm") {
HasHTM = true;
} else if (Feature == "+float128") {
@ -99,7 +97,7 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
}
// ABI options.
if (ABI == "elfv1" || ABI == "elfv1-qpx")
if (ABI == "elfv1")
Builder.defineMacro("_CALL_ELF", "1");
if (ABI == "elfv2")
Builder.defineMacro("_CALL_ELF", "2");
@ -159,22 +157,11 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("_ARCH_PWR10");
if (ArchDefs & ArchDefineA2)
Builder.defineMacro("_ARCH_A2");
if (ArchDefs & ArchDefineA2q) {
Builder.defineMacro("_ARCH_A2Q");
Builder.defineMacro("_ARCH_QP");
}
if (ArchDefs & ArchDefineE500)
Builder.defineMacro("__NO_LWSYNC__");
if (ArchDefs & ArchDefineFuture)
Builder.defineMacro("_ARCH_PWR_FUTURE");
if (getTriple().getVendor() == llvm::Triple::BGQ) {
Builder.defineMacro("__bg__");
Builder.defineMacro("__THW_BLUEGENE__");
Builder.defineMacro("__bgq__");
Builder.defineMacro("__TOS_BGQ__");
}
if (HasAltivec) {
Builder.defineMacro("__VEC__", "10206");
Builder.defineMacro("__ALTIVEC__");
@ -277,7 +264,6 @@ bool PPCTargetInfo::initFeatureMap(
.Case("ppc64le", true)
.Default(false);
Features["qpx"] = (CPU == "a2q");
Features["power9-vector"] = (CPU == "pwr9");
Features["crypto"] = llvm::StringSwitch<bool>(CPU)
.Case("ppc64le", true)
@ -373,7 +359,6 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
.Case("power8-vector", HasP8Vector)
.Case("crypto", HasP8Crypto)
.Case("direct-move", HasDirectMove)
.Case("qpx", HasQPX)
.Case("htm", HasHTM)
.Case("bpermd", HasBPERMD)
.Case("extdiv", HasExtDiv)
@ -503,17 +488,17 @@ ArrayRef<TargetInfo::AddlRegName> PPCTargetInfo::getGCCAddlRegNames() const {
}
static constexpr llvm::StringLiteral ValidCPUNames[] = {
{"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
{"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
{"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
{"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
{"g5"}, {"a2"}, {"a2q"}, {"e500"}, {"e500mc"},
{"e5500"}, {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"},
{"power5"}, {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"},
{"pwr6"}, {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"},
{"power8"}, {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"},
{"pwr10"}, {"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"},
{"powerpc64le"}, {"ppc64le"}, {"future"}};
{"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
{"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
{"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
{"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
{"g5"}, {"a2"}, {"e500"}, {"e500mc"}, {"e5500"},
{"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, {"power5"},
{"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, {"pwr6"},
{"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, {"power8"},
{"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, {"pwr10"},
{"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"}, {"powerpc64le"},
{"ppc64le"}, {"future"}};
bool PPCTargetInfo::isValidCPUName(StringRef Name) const {
return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);

View File

@ -46,7 +46,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
ArchDefinePwr10 = 1 << 14,
ArchDefineFuture = 1 << 15,
ArchDefineA2 = 1 << 16,
ArchDefineA2q = 1 << 17,
ArchDefineE500 = 1 << 18
} ArchDefineTypes;
@ -63,7 +62,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool HasP8Vector = false;
bool HasP8Crypto = false;
bool HasDirectMove = false;
bool HasQPX = false;
bool HasHTM = false;
bool HasBPERMD = false;
bool HasExtDiv = false;
@ -118,7 +116,6 @@ public:
.Case("970", ArchDefineName | ArchDefinePwr4 | ArchDefinePpcgr |
ArchDefinePpcsq)
.Case("a2", ArchDefineA2)
.Case("a2q", ArchDefineName | ArchDefineA2 | ArchDefineA2q)
.Cases("power3", "pwr3", ArchDefinePpcgr)
.Cases("power4", "pwr4",
ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)

View File

@ -57,7 +57,6 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) {
.Case("970", "970")
.Case("G5", "g5")
.Case("a2", "a2")
.Case("a2q", "a2q")
.Case("e500", "e500")
.Case("e500mc", "e500mc")
.Case("e5500", "e5500")

View File

@ -1883,18 +1883,6 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
if (T.isOSBinFormatELF()) {
switch (getToolChain().getArch()) {
case llvm::Triple::ppc64: {
// When targeting a processor that supports QPX, or if QPX is
// specifically enabled, default to using the ABI that supports QPX (so
// long as it is not specifically disabled).
bool HasQPX = false;
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
HasQPX = A->getValue() == StringRef("a2q");
HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX);
if (HasQPX) {
ABIName = "elfv1-qpx";
break;
}
if (T.isMusl() || (T.isOSFreeBSD() && T.getOSMajorVersion() >= 13))
ABIName = "elfv2";
else

View File

@ -167,12 +167,6 @@
// PPCPWR8: "-cc1"
// PPCPWR8: "-target-cpu" "pwr8"
// RUN: %clang -target powerpc64-unknown-linux-gnu \
// RUN: -### -S %s -mcpu=a2q 2>&1 | FileCheck -check-prefix=PPCA2Q %s
// PPCA2Q: clang
// PPCA2Q: "-cc1"
// PPCA2Q: "-target-cpu" "a2q"
// RUN: %clang -target powerpc64-unknown-linux-gnu \
// RUN: -### -S %s -mcpu=630 2>&1 | FileCheck -check-prefix=PPC630 %s
// PPC630: clang

View File

@ -5,14 +5,6 @@
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1 | FileCheck -check-prefix=CHECK-ELFv1 %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1-qpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2q | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2 -mqpx | FileCheck -check-prefix=CHECK-ELFv1-QPX %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2q -mno-qpx | FileCheck -check-prefix=CHECK-ELFv1 %s
// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2-BE %s
// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 \
@ -34,8 +26,6 @@
// CHECK-ELFv1: "-target-abi" "elfv1"
// CHECK-ELFv1-LE: "-mrelocation-model" "static"
// CHECK-ELFv1-LE: "-target-abi" "elfv1"
// CHECK-ELFv1-QPX: "-mrelocation-model" "static"
// CHECK-ELFv1-QPX: "-target-abi" "elfv1-qpx"
// CHECK-ELFv2: "-mrelocation-model" "static"
// CHECK-ELFv2: "-target-abi" "elfv2"
// CHECK-ELFv2-BE: "-mrelocation-model" "static"
@ -48,14 +38,6 @@
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1 | FileCheck -check-prefix=CHECK-ELFv1-PIC %s
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv1-qpx | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2q | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2 -mqpx | FileCheck -check-prefix=CHECK-ELFv1-QPX-PIC %s
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mcpu=a2q -mno-qpx | FileCheck -check-prefix=CHECK-ELFv1-PIC %s
// RUN: %clang -fPIC -target powerpc64-unknown-linux-gnu %s -### -o %t.o 2>&1 \
// RUN: -mabi=elfv2 | FileCheck -check-prefix=CHECK-ELFv2-PIC %s
// RUN: %clang -fPIC -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 \
@ -69,8 +51,6 @@
// CHECK-ELFv1-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
// CHECK-ELFv1-PIC: "-target-abi" "elfv1"
// CHECK-ELFv1-QPX-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
// CHECK-ELFv1-QPX-PIC: "-target-abi" "elfv1-qpx"
// CHECK-ELFv2-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
// CHECK-ELFv2-PIC: "-target-abi" "elfv2"

View File

@ -79,7 +79,7 @@
// PPC: error: unknown target CPU 'not-a-cpu'
// PPC: note: valid target CPU values are: generic, 440, 450, 601, 602, 603,
// PPC-SAME: 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750,
// PPC-SAME: 8548, 970, g5, a2, a2q, e500, e500mc, e5500, power3, pwr3, power4,
// PPC-SAME: 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4,
// PPC-SAME: pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x,
// PPC-SAME: power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, powerpc64,
// PPC-SAME: ppc64, powerpc64le, ppc64le, future

View File

@ -408,21 +408,6 @@
// PPC64LE:#define __ppc64__ 1
// PPC64LE:#define __ppc__ 1
//
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu a2q -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCA2Q %s
//
// PPCA2Q:#define _ARCH_A2 1
// PPCA2Q:#define _ARCH_A2Q 1
// PPCA2Q:#define _ARCH_PPC 1
// PPCA2Q:#define _ARCH_PPC64 1
// PPCA2Q:#define _ARCH_QP 1
//
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-bgq-linux -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCBGQ %s
//
// PPCBGQ:#define __THW_BLUEGENE__ 1
// PPCBGQ:#define __TOS_BGQ__ 1
// PPCBGQ:#define __bg__ 1
// PPCBGQ:#define __bgq__ 1
//
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu 630 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPC630 %s
//
// PPC630:#define _ARCH_630 1
@ -1069,7 +1054,6 @@
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1-qpx < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s
// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s

View File

@ -4310,14 +4310,9 @@ PowerPC:
- ``r``: A 32 or 64-bit integer register.
- ``b``: A 32 or 64-bit integer register, excluding ``R0`` (that is:
``R1-R31``).
- ``f``: A 32 or 64-bit float register (``F0-F31``), or when QPX is enabled, a
128 or 256-bit QPX register (``Q0-Q31``; aliases the ``F`` registers).
- ``v``: For ``4 x f32`` or ``4 x f64`` types, when QPX is enabled, a
128 or 256-bit QPX register (``Q0-Q31``), otherwise a 128-bit
altivec vector register (``V0-V31``).
.. FIXME: is this a bug that v accepts QPX registers? I think this
is supposed to only use the altivec vector registers?
- ``f``: A 32 or 64-bit float register (``F0-F31``),
- ``v``: For ``4 x f32`` or ``4 x f64`` types, a 128-bit altivec vector
register (``V0-V31``).
- ``y``: Condition register (``CR0-CR7``).
- ``wc``: An individual CR bit in a CR register.

View File

@ -142,8 +142,6 @@ public:
Apple,
PC,
SCEI,
BGP,
BGQ,
Freescale,
IBM,
ImaginationTechnologies,
@ -179,7 +177,6 @@ public:
Minix,
RTEMS,
NaCl, // Native Client
CNK, // BG/P Compute-Node Kernel
AIX,
CUDA, // NVIDIA CUDA
NVCL, // NVIDIA OpenCL

View File

@ -1109,182 +1109,6 @@ def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">,
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsics.
//
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
/// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
: GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
}
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsic Class Definitions.
//
/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
/// vector and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and returns a v4f64.
class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and returns a v4f64 permutation.
class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
/// and stores a v4f64.
class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
: PowerPC_QPX_Intrinsic<GCCIntSuffix,
[], [llvm_v4f64_ty, llvm_ptr_ty],
[IntrWriteMem, IntrArgMemOnly]>;
//===----------------------------------------------------------------------===//
// PowerPC QPX Intrinsic Definitions.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Add Instructions
def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
// Estimate Instructions
def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
// Multiply Instructions
def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
// Multiply-add instructions
def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
// Select Instruction
def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
// Permute Instruction
def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
// Convert and Round Instructions
def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
// Move Instructions
def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
// Compare Instructions
def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
// Load instructions
def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
// Store instructions
def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
// Logical and permutation formation
def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
[llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// PowerPC HTM Intrinsic Definitions.

View File

@ -160,8 +160,6 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
case AMD: return "amd";
case Apple: return "apple";
case BGP: return "bgp";
case BGQ: return "bgq";
case CSR: return "csr";
case Freescale: return "fsl";
case IBM: return "ibm";
@ -187,7 +185,6 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case AMDHSA: return "amdhsa";
case AMDPAL: return "amdpal";
case Ananas: return "ananas";
case CNK: return "cnk";
case CUDA: return "cuda";
case CloudABI: return "cloudabi";
case Contiki: return "contiki";
@ -470,8 +467,6 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("apple", Triple::Apple)
.Case("pc", Triple::PC)
.Case("scei", Triple::SCEI)
.Case("bgp", Triple::BGP)
.Case("bgq", Triple::BGQ)
.Case("fsl", Triple::Freescale)
.Case("ibm", Triple::IBM)
.Case("img", Triple::ImaginationTechnologies)
@ -508,7 +503,6 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("minix", Triple::Minix)
.StartsWith("rtems", Triple::RTEMS)
.StartsWith("nacl", Triple::NaCl)
.StartsWith("cnk", Triple::CNK)
.StartsWith("aix", Triple::AIX)
.StartsWith("cuda", Triple::CUDA)
.StartsWith("nvcl", Triple::NVCL)

View File

@ -492,21 +492,6 @@ public:
Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
}
void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
}
void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
}
void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
}
void addRegSPE4RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
@ -1207,9 +1192,6 @@ bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
} else if (Name.startswith_lower("v") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = VRegs[IntVal];
} else if (Name.startswith_lower("q") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = QFRegs[IntVal];
} else if (Name.startswith_lower("cr") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = CRRegs[IntVal];

View File

@ -36,7 +36,6 @@ add_llvm_target(PowerPCCodeGen
PPCMacroFusion.cpp
PPCMIPeephole.cpp
PPCRegisterInfo.cpp
PPCQPXLoadSplat.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
PPCTargetObjectFile.cpp

View File

@ -167,12 +167,6 @@ static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
return decodeRegisterClass(Inst, RegNo, QFRegs);
}
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
@ -401,14 +395,9 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Read the instruction in the proper endianness.
uint64_t Inst = ReadFunc(Bytes.data());
if (STI.getFeatureBits()[PPC::FeatureQPX]) {
if (STI.getFeatureBits()[PPC::FeatureSPE]) {
DecodeStatus result =
decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
} else if (STI.getFeatureBits()[PPC::FeatureSPE]) {
DecodeStatus result =
decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
}

View File

@ -49,18 +49,6 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
const char *RegName = getRegisterName(RegNo);
if (RegName[0] == 'q' /* QPX */) {
// The system toolchain on the BG/Q does not understand QPX register names
// in .cfi_* directives, so print the name of the floating-point
// subregister instead.
std::string RN(RegName);
RN[0] = 'f';
OS << RN;
return;
}
OS << RegName;
}

View File

@ -159,7 +159,6 @@ using llvm::MCPhysReg;
static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
static const MCPhysReg QFRegs[32] = PPC_REGS0_31(PPC::QF); \
static const MCPhysReg RRegsNoR0[32] = \
PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
static const MCPhysReg XRegsNoX0[32] = \

View File

@ -44,7 +44,6 @@ namespace llvm {
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCBranchCoalescingPass();
FunctionPass *createPPCQPXLoadSplatPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
@ -68,7 +67,6 @@ namespace llvm {
void initializePPCReduceCRLogicalsPass(PassRegistry&);
void initializePPCBSelPass(PassRegistry&);
void initializePPCBranchCoalescingPass(PassRegistry&);
void initializePPCQPXLoadSplatPass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
void initializePPCPreEmitPeepholePass(PassRegistry &);

View File

@ -132,9 +132,6 @@ def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
"Enable PPC 4xx instructions">;
def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
"Enable PPC 6xx instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions",
[FeatureFPU]>;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
@ -193,7 +190,7 @@ def FeatureFloat128 :
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD",
"POPCNTD_Fast",
"Enable the popcnt[dw] instructions">;
// Note that for the a2/a2q processor models we should not use popcnt[dw] by
// Note that for the a2 processor models we should not use popcnt[dw] by
// default. These processors do support the instructions, but they're
// microcoded, and the software emulation is about twice as fast.
def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
@ -514,15 +511,6 @@ def : ProcessorModel<"a2", PPCA2Model,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
def : ProcessorModel<"a2q", PPCA2Model,
[DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */, FeatureQPX,
FeatureMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,

View File

@ -549,9 +549,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (Subtarget->hasSPE()) {
if (PPC::F4RCRegClass.contains(Reg) ||
PPC::F8RCRegClass.contains(Reg) ||
PPC::QBRCRegClass.contains(Reg) ||
PPC::QFRCRegClass.contains(Reg) ||
PPC::QSRCRegClass.contains(Reg) ||
PPC::VFRCRegClass.contains(Reg) ||
PPC::VRRCRegClass.contains(Reg) ||
PPC::VSFRCRegClass.contains(Reg) ||

View File

@ -61,9 +61,6 @@ def RetCC_PPC_Cold : CallingConv<[
CCIfType<[f64], CCAssignToReg<[F1]>>,
CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>,
CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2]>>>
@ -98,10 +95,6 @@ def RetCC_PPC : CallingConv<[
CCIfSubtarget<"hasP9Vector()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
// QPX vectors are returned in QF1 and QF2.
CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
@ -158,8 +151,6 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[f128],
CCIfSubtarget<"hasP9Vector()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
CCIfType<[v4f64, v4f32, v4i1],
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
@ -223,9 +214,6 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>,
CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>,
// QPX vectors that are stored in double precision need 32-byte alignment.
CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
// Vectors and float128 get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>,
CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>>
@ -243,10 +231,6 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
// put vector arguments in vector registers before putting them on the stack.
let Entry = 1 in
def CC_PPC32_SVR4 : CallingConv<[
// QPX vectors mirror the scalar FP convention.
CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,

View File

@ -4142,7 +4142,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
if (Subtarget->hasQPX() || Subtarget->hasSPE())
if (Subtarget->hasSPE())
return false;
EVT VecVT = LHS.getValueType();
@ -4813,8 +4813,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
@ -5095,12 +5093,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectCCOp = PPC::SELECT_CC_F16;
else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
SelectCCOp = PPC::SELECT_CC_QFRC;
else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
SelectCCOp = PPC::SELECT_CC_QSRC;
else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
@ -5856,9 +5848,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_QFRC:
case PPC::SELECT_QSRC:
case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:
@ -6177,9 +6166,6 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_QFRC:
case PPC::SELECT_QSRC:
case PPC::SELECT_QBRC:
case PPC::SELECT_SPE:
case PPC::SELECT_SPE4:
case PPC::SELECT_VRRC:

File diff suppressed because it is too large Load Diff

View File

@ -427,22 +427,6 @@ namespace llvm {
/// => VABSDUW((XVNEGSP a), (XVNEGSP b))
VABSD,
/// QVFPERM = This corresponds to the QPX qvfperm instruction.
QVFPERM,
/// QVGPCI = This corresponds to the QPX qvgpci instruction.
QVGPCI,
/// QVALIGNI = This corresponds to the QPX qvaligni instruction.
QVALIGNI,
/// QVESPLATI = This corresponds to the QPX qvesplati instruction.
QVESPLATI,
/// QBFLT = Access the underlying QPX floating-point boolean
/// representation.
QBFLT,
/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
@ -519,10 +503,6 @@ namespace llvm {
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
/// QBRC, CHAIN = QVLFSb CHAIN, Ptr
/// The 4xf32 load used for v4i1 constants.
QVLFSb,
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
/// except they ensure that the compare input is zero-extended for
/// sub-word versions because the atomic loads zero-extend.

View File

@ -642,7 +642,6 @@ class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let FRA = 0;
}
// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@ -1781,14 +1780,6 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
// Used for QPX
class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRA = 0;
let FRC = 0;
}
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@ -2099,49 +2090,6 @@ class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr,
let Inst{23-31} = xo;
}
// Z23-Form (used by QPX)
class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<5> FRA;
bits<5> FRB;
bits<2> idx;
let Pattern = pattern;
bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-15} = FRA;
let Inst{16-20} = FRB;
let Inst{21-22} = idx;
let Inst{23-30} = xo;
let Inst{31} = RC;
}
class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let FRB = 0;
}
class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;
bits<12> idx;
let Pattern = pattern;
bit RC = 0; // set by isRecordForm
let Inst{6-10} = FRT;
let Inst{11-22} = idx;
let Inst{23-30} = xo;
let Inst{31} = RC;
}
class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {

View File

@ -259,16 +259,6 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case PPC::XVMULDP:
case PPC::XVMULSP:
case PPC::XSMULSP:
// QPX Add:
case PPC::QVFADD:
case PPC::QVFADDS:
case PPC::QVFADDSs:
// QPX Multiply:
case PPC::QVFMUL:
case PPC::QVFMULS:
case PPC::QVFMULSs:
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
// Fixed point:
// Multiply:
case PPC::MULHD:
@ -300,9 +290,7 @@ static const uint16_t FMAOpIdxInfo[][5] = {
{PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
{PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
{PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
{PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
{PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
{PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
{PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1}};
// Check if an opcode is a FMA instruction. If it is, return the index in array
// FMAOpIdxInfo. Otherwise, return -1.
@ -666,7 +654,6 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::LI8:
case PPC::LIS:
case PPC::LIS8:
case PPC::QVGPCI:
case PPC::ADDIStocHA:
case PPC::ADDIStocHA8:
case PPC::ADDItocL:
@ -1343,12 +1330,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMR;
else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMRs;
else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
@ -1393,12 +1374,6 @@ static unsigned getSpillIndex(const TargetRegisterClass *RC) {
OpcodeIndex = SOK_VectorFloat4Spill;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_VRSaveSpill;
} else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_QuadFloat8Spill;
} else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_QuadFloat4Spill;
} else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_QuadBitSpill;
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SpillToVSR;
} else {

View File

@ -123,9 +123,6 @@ enum SpillOpcodeKey {
SOK_VectorFloat8Spill,
SOK_VectorFloat4Spill,
SOK_VRSaveSpill,
SOK_QuadFloat8Spill,
SOK_QuadFloat4Spill,
SOK_QuadBitSpill,
SOK_SpillToVSR,
SOK_SPESpill,
SOK_LastOpcodeSpill // This must be last on the enum.
@ -136,32 +133,28 @@ enum SpillOpcodeKey {
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \
PPC::SPILLTOVSR_LD, PPC::EVLDD \
PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD, PPC::EVLDD \
}
#define Pwr9LoadOpcodes \
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \
PPC::QVLFDXb, PPC::SPILLTOVSR_LD \
PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::SPILLTOVSR_LD \
}
#define Pwr8StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \
PPC::EVSTDD \
PPC::SPILLTOVSR_ST, PPC::EVSTDD \
}
#define Pwr9StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \
PPC::SPILLTOVSR_ST \
PPC::SPILL_VRSAVE, PPC::SPILLTOVSR_ST \
}
// Initialize arrays for load and store spill opcodes on supported subtargets.
@ -273,10 +266,10 @@ public:
}
static bool isSameClassPhysRegCopy(unsigned Opcode) {
unsigned CopyOpcodes[] =
{ PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
PPC::XSCPSGNDP, PPC::MCRF, PPC::QVFMR, PPC::QVFMRs, PPC::QVFMRb,
PPC::CROR, PPC::EVOR, -1U };
unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR,
PPC::VOR, PPC::XXLOR, PPC::XXLORf,
PPC::XSCPSGNDP, PPC::MCRF, PPC::CROR,
PPC::EVOR, -1U};
for (int i = 0; CopyOpcodes[i] != -1U; i++)
if (Opcode == CopyOpcodes[i])
return true;

View File

@ -203,16 +203,6 @@ def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
[SDNPHasChain, SDNPMayLoad]>;
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@ -3461,7 +3451,6 @@ include "PPCInstrAltivec.td"
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
include "PPCInstrQPX.td"
include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),

File diff suppressed because it is too large Load Diff

View File

@ -1,161 +0,0 @@
//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The QPX vector registers overlay the scalar floating-point registers, and
// any scalar floating-point loads splat their value across all vector lanes.
// Thus, if we have a scalar load followed by a splat, we can remove the splat
// (i.e. replace the load with a load-and-splat pseudo instruction).
//
// This pass must run after anything that might do store-to-load forwarding.
//
//===----------------------------------------------------------------------===//
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-qpx-load-splat"
STATISTIC(NumSimplified, "Number of QPX load splats simplified");
namespace {
struct PPCQPXLoadSplat : public MachineFunctionPass {
static char ID;
PPCQPXLoadSplat() : MachineFunctionPass(ID) {
initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &Fn) override;
StringRef getPassName() const override {
return "PowerPC QPX Load Splat Simplification";
}
};
char PPCQPXLoadSplat::ID = 0;
}
INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
"PowerPC QPX Load Splat Simplification",
false, false)
FunctionPass *llvm::createPPCQPXLoadSplatPass() {
return new PPCQPXLoadSplat();
}
bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
bool MadeChange = false;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
MachineBasicBlock *MBB = &*MFI;
SmallVector<MachineInstr *, 4> Splats;
for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
MachineInstr *MI = &*MBBI;
if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
Splats.clear();
continue;
}
// We're looking for a sequence like this:
// %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
// %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
for (auto SI = Splats.begin(); SI != Splats.end();) {
MachineInstr *SMI = *SI;
Register SplatReg = SMI->getOperand(0).getReg();
Register SrcReg = SMI->getOperand(1).getReg();
if (MI->modifiesRegister(SrcReg, TRI)) {
switch (MI->getOpcode()) {
default:
SI = Splats.erase(SI);
continue;
case PPC::LFS:
case PPC::LFD:
case PPC::LFSU:
case PPC::LFDU:
case PPC::LFSUX:
case PPC::LFDUX:
case PPC::LFSX:
case PPC::LFDX:
case PPC::LFIWAX:
case PPC::LFIWZX:
if (SplatReg != SrcReg) {
// We need to change the load to define the scalar subregister of
// the QPX splat source register.
unsigned SubRegIndex =
TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
// Substitute both the explicit defined register, and also the
// implicit def of the containing QPX register.
MI->getOperand(0).setReg(SplatSubReg);
MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
}
SI = Splats.erase(SI);
// If SMI is directly after MI, then MBBI's base iterator is
// pointing at SMI. Adjust MBBI around the call to erase SMI to
// avoid invalidating MBBI.
++MBBI;
SMI->eraseFromParent();
--MBBI;
++NumSimplified;
MadeChange = true;
continue;
}
}
// If this instruction defines the splat register, then we cannot move
// the previous definition above it. If it reads from the splat
// register, then it must already be alive from some previous
// definition, and if the splat register is different from the source
// register, then this definition must not be the load for which we're
// searching.
if (MI->modifiesRegister(SplatReg, TRI) ||
(SrcReg != SplatReg &&
MI->readsRegister(SplatReg, TRI))) {
SI = Splats.erase(SI);
continue;
}
++SI;
}
if (MI->getOpcode() != PPC::QVESPLATI &&
MI->getOpcode() != PPC::QVESPLATIs &&
MI->getOpcode() != PPC::QVESPLATIb)
continue;
if (MI->getOperand(2).getImm() != 0)
continue;
// If there are other uses of the scalar value after this, replacing
// those uses might be non-trivial.
if (!MI->getOperand(1).isKill())
continue;
Splats.push_back(MI);
}
}
return MadeChange;
}

View File

@ -404,9 +404,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::QFRCRegClassID:
case PPC::QSRCRegClassID:
case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:

View File

@ -153,7 +153,6 @@ public:
switch (RegName[0]) {
case 'r':
case 'f':
case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;

View File

@ -54,13 +54,6 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
class QFPR<FPR SubReg, string n> : PPCReg<n> {
let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_64];
}
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
@ -132,12 +125,6 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
// QPX Floating-point registers
foreach Index = 0-31 in {
def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@ -343,16 +330,6 @@ def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC,
// Register class for single precision scalars in VSX registers
def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
// For QPX
def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
(sequence "QF%u", 31, 14))>;
def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
// These are actually stored as floating-point values where a positive
// number is true and anything else (including NaN) is false.
let Size = 256;
}
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,

View File

@ -40,12 +40,9 @@ def P9Model : SchedMachineModel {
let CompleteModel = 1;
// Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
// Engine), prefixed instructions on Power 9, PC relative mem ops, or
// instructions introduced in ISA 3.1.
let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
IsISA3_1];
// Do not support SPE (Signal Processing Engine), prefixed instructions on
// Power 9, PC relative mem ops, or instructions introduced in ISA 3.1.
let UnsupportedFeatures = [HasSPE, PrefixInstrs, PCRelativeMemops, IsISA3_1];
}
let SchedModel = P9Model in {

View File

@ -35,10 +35,6 @@ using namespace llvm;
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
cl::Hidden);
static cl::opt<bool>
EnableMachinePipeliner("ppc-enable-pipeliner",
cl::desc("Enable Machine Pipeliner for PPC"),
@ -70,7 +66,6 @@ void PPCSubtarget::initializeEnvironment() {
HasAltivec = false;
HasSPE = false;
HasFPU = false;
HasQPX = false;
HasVSX = false;
NeedsTwoConstNR = false;
HasP8Vector = false;
@ -109,7 +104,6 @@ void PPCSubtarget::initializeEnvironment() {
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
HasDirectMove = false;
IsQPXStackUnaligned = false;
HasHTM = false;
HasFloat128 = false;
HasFusion = false;
@ -158,7 +152,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (HasSPE && IsPPC64)
report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
if (HasSPE && (HasAltivec || HasVSX || HasFPU))
report_fatal_error(
"SPE and traditional floating point cannot both be enabled.\n", false);
@ -166,10 +160,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (!HasSPE)
HasFPU = true;
// QPX requires a 32-byte aligned stack. Note that we need to do this if
// we're compiling for a BG/Q system regardless of whether or not QPX
// is enabled because external functions will assume this alignment.
IsQPXStackUnaligned = QPXStackUnaligned;
StackAlignment = getPlatformStackAlignment();
// Determine endianness.

View File

@ -97,7 +97,6 @@ protected:
bool HasAltivec;
bool HasFPU;
bool HasSPE;
bool HasQPX;
bool HasVSX;
bool NeedsTwoConstNR;
bool HasP8Vector;
@ -150,11 +149,6 @@ protected:
POPCNTDKind HasPOPCNTD;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
/// of the stack.
bool IsQPXStackUnaligned;
const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
@ -255,7 +249,6 @@ public:
bool hasAltivec() const { return HasAltivec; }
bool hasSPE() const { return HasSPE; }
bool hasFPU() const { return HasFPU; }
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
bool needsTwoConstNR() const { return NeedsTwoConstNR; }
bool hasP8Vector() const { return HasP8Vector; }
@ -291,11 +284,7 @@ public:
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool hasDirectMove() const { return HasDirectMove; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
Align getPlatformStackAlignment() const {
if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
return Align(32);
return Align(16);
}
@ -325,9 +314,6 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }

View File

@ -63,10 +63,6 @@ static cl::
opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
cl::desc("Disable VSX Swap Removal for PPC"));
static cl::
opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
cl::desc("Disable QPX load splat simplification"));
static cl::
opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
cl::desc("Disable machine peepholes for PPC"));
@ -114,7 +110,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
initializePPCReduceCRLogicalsPass(PR);
initializePPCBSelPass(PR);
initializePPCBranchCoalescingPass(PR);
initializePPCQPXLoadSplatPass(PR);
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
initializePPCPreEmitPeepholePass(PR);
@ -411,14 +406,9 @@ void PPCPassConfig::addIRPasses() {
// Lower generic MASSV routines to PowerPC subtarget-specific entries.
addPass(createPPCLowerMASSVEntriesPass());
// For the BG/Q (or if explicitly requested), add explicit data prefetch
// intrinsics.
bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
getOptLevel() != CodeGenOpt::None;
// If explicitly requested, add explicit data prefetch intrinsics.
if (EnablePrefetch.getNumOccurrences() > 0)
UsePrefetching = EnablePrefetch;
if (UsePrefetching)
addPass(createLoopDataPrefetchPass());
if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
@ -515,15 +505,8 @@ void PPCPassConfig::addPreRegAlloc() {
}
void PPCPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None) {
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
// This optimization must happen after anything that might do store-to-load
// forwarding. Here we're after RA (and, thus, when spills are inserted)
// but before post-RA scheduling.
if (!DisableQPXLoadSplat)
addPass(createPPCQPXLoadSplatPass());
}
}
void PPCPassConfig::addPreEmitPass() {

View File

@ -25,8 +25,7 @@ using namespace llvm;
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
// This is currently only used for the data prefetch pass which is only enabled
// for BG/Q by default.
// This is currently only used for the data prefetch pass
static cl::opt<unsigned>
CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
cl::desc("The loop prefetch cache line size"));
@ -104,55 +103,6 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
}
case Intrinsic::ppc_qpx_qvlfs:
// Turn PPC QPX qvlfs -> load if the pointer is known aligned.
if (getOrEnforceKnownAlignment(
II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
Type *VTy =
VectorType::get(IC.Builder.getFloatTy(),
cast<VectorType>(II.getType())->getElementCount());
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(0),
PointerType::getUnqual(VTy));
Value *Load = IC.Builder.CreateLoad(VTy, Ptr);
return new FPExtInst(Load, II.getType());
}
break;
case Intrinsic::ppc_qpx_qvlfd:
// Turn PPC QPX qvlfd -> load if the pointer is known aligned.
if (getOrEnforceKnownAlignment(
II.getArgOperand(0), Align(32), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
Value *Ptr = IC.Builder.CreateBitCast(
II.getArgOperand(0), PointerType::getUnqual(II.getType()));
return new LoadInst(II.getType(), Ptr, "", false, Align(32));
}
break;
case Intrinsic::ppc_qpx_qvstfs:
// Turn PPC QPX qvstfs -> store if the pointer is known aligned.
if (getOrEnforceKnownAlignment(
II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
Type *VTy = VectorType::get(
IC.Builder.getFloatTy(),
cast<VectorType>(II.getArgOperand(0)->getType())->getElementCount());
Value *TOp = IC.Builder.CreateFPTrunc(II.getArgOperand(0), VTy);
Type *OpPtrTy = PointerType::getUnqual(VTy);
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
return new StoreInst(TOp, Ptr, false, Align(16));
}
break;
case Intrinsic::ppc_qpx_qvstfd:
// Turn PPC QPX qvstfd -> store if the pointer is known aligned.
if (getOrEnforceKnownAlignment(
II.getArgOperand(1), Align(32), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree()) >= 32) {
Type *OpPtrTy = PointerType::getUnqual(II.getArgOperand(0)->getType());
Value *Ptr = IC.Builder.CreateBitCast(II.getArgOperand(1), OpPtrTy);
return new StoreInst(II.getArgOperand(0), Ptr, false, Align(32));
}
break;
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@ -736,10 +686,7 @@ bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
// On the A2, always unroll aggressively. For QPX unaligned loads, we depend
// on combining the loads generated for consecutive accesses, and failure to
// do so is particularly expensive. This makes it much more likely (compared
// to only using concatenation unrolling).
// On the A2, always unroll aggressively.
if (ST->getCPUDirective() == PPC::DIR_A2)
return true;
@ -799,7 +746,6 @@ const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
@ -828,8 +774,6 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
}
unsigned PPCTTIImpl::getPrefetchDistance() const {
// This seems like a reasonable default for the BG/Q (this pass is enabled, by
// default, only on the BG/Q).
return 300;
}
@ -918,7 +862,7 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
@ -974,13 +918,6 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return Cost;
} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
// Floating point scalars are already located in index #0.
if (Index == 0)
return 0;
return Cost;
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
if (ST->hasP9Altivec()) {
if (ISD == ISD::INSERT_VECTOR_ELT)
@ -1055,8 +992,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
bool IsVSXType = ST->hasVSX() &&
(LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
bool IsQPXType = ST->hasQPX() &&
(LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
// VSX has 32b/64b load instructions. Legalization can handle loading of
// 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
@ -1079,8 +1014,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// for Altivec types using the VSX instructions, but that's more expensive
// than using the permutation-based load sequence. On the P8, that's no
// longer true.
if (Opcode == Instruction::Load &&
((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
*Alignment >= LT.second.getScalarType().getStoreSize())
return Cost + LT.first; // Add the cost of the permutations.
@ -1133,7 +1067,7 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(
getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
CostKind);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// PPC, for both Altivec/VSX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
// instruction). For each result vector, we need one shuffle per incoming
// vector (except that the first shuffle can take two incoming vectors

View File

@ -4751,15 +4751,14 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
// For PowerPC, we need to deal with alignment of stack arguments -
// they are mostly aligned to 8 bytes, but vectors and i128 arrays
// are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
// and QPX vectors are aligned to 32 bytes. For that reason, we
// compute current offset from stack pointer (which is always properly
// aligned), and offset for the first vararg, then subtract them.
// For that reason, we compute current offset from stack pointer (which is
// always properly aligned), and offset for the first vararg, then subtract
// them.
unsigned VAArgBase;
Triple TargetTriple(F.getParent()->getTargetTriple());
// Parameter save area starts at 48 bytes from frame pointer for ABIv1,
// and 32 bytes for ABIv2. This is usually determined by target
// endianness, but in theory could be overridden by function attribute.
// For simplicity, we ignore it here (it'd only matter for QPX vectors).
if (TargetTriple.getArch() == Triple::ppc64)
VAArgBase = 48;
else

View File

@ -1,5 +1,5 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
@X = external global [16000 x double], align 32

View File

@ -218,42 +218,6 @@ entry:
; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
}
define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
entry:
%r = load <4 x float>, <4 x float>* %p, align 4
ret <4 x float> %r
; CHECK-LABEL: test_l_qv4float
; CHECK: cost of 2 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
}
define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
entry:
%r = load <8 x float>, <8 x float>* %p, align 4
ret <8 x float> %r
; CHECK-LABEL: test_l_qv8float
; CHECK: cost of 4 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
}
define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
entry:
%r = load <4 x double>, <4 x double>* %p, align 8
ret <4 x double> %r
; CHECK-LABEL: test_l_qv4double
; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
}
define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
entry:
%r = load <8 x double>, <8 x double>* %p, align 8
ret <8 x double> %r
; CHECK-LABEL: test_l_qv8double
; CHECK: cost of 4 for instruction: %r = load <8 x double>, <8 x double>* %p, align 8
}
define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
entry:
store <16 x i8> %v, <16 x i8>* %p, align 1
@ -362,43 +326,6 @@ entry:
; CHECK: cost of 2 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
}
define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
entry:
store <4 x float> %v, <4 x float>* %p, align 4
ret void
; CHECK-LABEL: test_s_qv4float
; CHECK: cost of 7 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
}
define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
entry:
store <8 x float> %v, <8 x float>* %p, align 4
ret void
; CHECK-LABEL: test_s_qv8float
; CHECK: cost of 15 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
}
define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
entry:
store <4 x double> %v, <4 x double>* %p, align 8
ret void
; CHECK-LABEL: test_s_qv4double
; CHECK: cost of 7 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
}
define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
entry:
store <8 x double> %v, <8 x double>* %p, align 8
ret void
; CHECK-LABEL: test_s_qv8double
; CHECK: cost of 15 for instruction: store <8 x double> %v, <8 x double>* %p, align 8
}
attributes #0 = { nounwind "target-cpu"="pwr7" }
attributes #1 = { nounwind "target-cpu"="a2q" }
attributes #2 = { nounwind "target-cpu"="pwr8" }

View File

@ -1,4 +1,4 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -enable-misched < %s | FileCheck %s
;
; PR14315: misched should not move the physreg copy of %t below the calls.

View File

@ -55,7 +55,7 @@
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0, !1}

View File

@ -30,7 +30,7 @@
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
!llvm.ident = !{!0}

View File

@ -1,23 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
declare i32 @bar(i8* %a) nounwind;
define i32 @foo() nounwind {
%p = alloca i8, i8 115
store i8 0, i8* %p
%r = call i32 @bar(i8* %p)
ret i32 %r
}
; Without QPX, the allocated stack frame is 240 bytes, but with QPX
; (because we require 32-byte alignment), it is 256 bytes.
; CHECK-A2: @foo
; CHECK-A2: stdu 1, -240(1)
; CHECK-A2Q: @foo
; CHECK-A2Q: stdu 1, -256(1)
; CHECK-BGQ: @foo
; CHECK-BGQ: stdu 1, -256(1)

View File

@ -1,10 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2q | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64-- -mcpu=a2 -mattr=+qpx | FileCheck %s
define void @foo() {
entry:
ret void
}
; CHECK: @foo

View File

@ -298,7 +298,7 @@ _ZN10SubProcess12SafeSyscalls5fcntlEiil.exit: ; preds = %_ZN10SubProcess12Sa
; Function Attrs: nounwind argmemonly
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind argmemonly }
attributes #2 = { nounwind }

View File

@ -1,6 +1,5 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2 | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
define i32 @zytest(i32 %a) nounwind {
entry:

View File

@ -65,7 +65,7 @@ entry:
}
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }

View File

@ -63,8 +63,8 @@
ret i64 %2
}
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

View File

@ -187,7 +187,7 @@
ret i64 %cond
}
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

View File

@ -983,10 +983,10 @@
ret i64 %xor
}
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

View File

@ -1,5 +1,4 @@
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s --check-prefix=QPX
declare float @fabsf(float)
@ -64,11 +63,6 @@ loop_exit:
; CHECK-NOT: xsmindp
; CHECK: blr
; QPX-LABEL: test1v:
; QPX: mtctr
; QPX-NOT: bl fminf
; QPX: blr
define void @test1a(float %f, float* %fp) {
entry:
br label %loop_body
@ -139,11 +133,6 @@ loop_exit:
; CHECK-NOT: xsmaxdp
; CHECK: blr
; QPX-LABEL: test2v:
; QPX: mtctr
; QPX-NOT: bl fmax
; QPX: blr
define void @test2a(float %f, float* %fp) {
entry:
br label %loop_body

View File

@ -1,5 +1,4 @@
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q | FileCheck %s --check-prefixes=CHECK,CHECK-A2Q
; Verify that we do NOT generate the mtctr instruction for loop trip counts < 4
; The latency of the mtctr is only justified if there are more than 4 comparisons that are removed as a result.
@ -86,11 +85,8 @@ for.body: ; preds = %entry, %for.body
}
; Function Attrs: norecurse nounwind
; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
; a2q should use mtctr, but pwr8 should not use mtctr.
define signext i32 @testTripCount2NonSmallLoop() {
; CHECK-LABEL: testTripCount2NonSmallLoop:
; CHECK-A2Q: mtctr
; CHECK-PWR8-NOT: mtctr
; CHECK: blr
@ -121,12 +117,9 @@ for.end: ; preds = %if.end
ret i32 %conv
}
; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8.
; a2q should use mtctr, but pwr8 should not use mtctr.
define signext i32 @testTripCount5() {
; CHECK-LABEL: testTripCount5:
; CHECK-PWR8-NOT: mtctr
; CHECK-A2Q: mtctr
entry:
%.prea = load i32, i32* @a, align 4

View File

@ -5,7 +5,7 @@
; that were both inputs to the inline asm and also early-clobber outputs).
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712 = type { %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32 }

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64-unknown-linux"
%"class.Foam::messageStream.6" = type <{ %"class.Foam::string.5", i32, i32, i32, [4 x i8] }>
%"class.Foam::string.5" = type { %"class.std::basic_string.4" }
@ -419,8 +419,8 @@ declare void @_ZN4Foam11regIOobjectD2Ev() #0
declare void @_ZN4Foam6reduceIiNS_5sumOpIiEEEEvRKNS_4ListINS_8UPstream11commsStructEEERT_RKT0_ii() #0
attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { inlinehint "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -O0 -relocation-model=pic < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
%"class.std::__1::__tree_node.130.151" = type { %"class.std::__1::__tree_node_base.base.128.149", %"class.boost::serialization::extended_type_info.129.150"* }
%"class.std::__1::__tree_node_base.base.128.149" = type <{ %"class.std::__1::__tree_end_node.127.148", %"class.std::__1::__tree_node_base.126.147"*, %"class.std::__1::__tree_node_base.126.147"*, i8 }>

View File

@ -33,4 +33,4 @@ define float @f(float %xf) #0 {
ret float %25
}
attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -1,6 +1,5 @@
; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
define linkonce_odr double @test1(ppc_fp128 %input) {
entry:

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
%"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::__exception_ptr::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 }
%"class.std::__1::__shared_count" = type { i32 (...)**, i64 }

View File

@ -33,5 +33,5 @@ declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)*, i8*)
declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #7 = { nobuiltin nounwind }

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux -mcpu=a2 < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
%struct.BG_CoordinateMapping_t = type { [4 x i8] }

View File

@ -1,6 +1,5 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
entry:

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -enable-ppc-prefetching=true -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
; Function Attrs: nounwind
define void @foo(double* %x, double* nocapture readonly %y) #0 {

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -enable-ppc-prefetching=true -mcpu=a2 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
; Function Attrs: nounwind
define void @foo(double* nocapture %a, double* nocapture readonly %b) #0 {

View File

@ -1,5 +1,4 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-bgq-linux < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BGQ
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@ -21,7 +20,6 @@ for.body: ; preds = %for.body, %entry
; CHECK-LABEL: @foo
; CHECK-BGQ-DAG: dcbt 4, 5
; CHECK-DAG: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
; CHECK-DAG: fadd [[REG2:[0-9]+]], [[REG1]], 0
; CHECK-DAG: stfdu [[REG2]], 8({{[0-9]+}})
@ -34,15 +32,13 @@ for.cond.cleanup6: ; preds = %for.body7
for.body7: ; preds = %for.body, %for.body7
%i3.017 = phi i32 [ %inc9, %for.body7 ], [ 0, %for.body ]
tail call void bitcast (void (...)* @bar to void ()*)() #2
tail call void bitcast (void (...)* @bar to void ()*)() #0
%inc9 = add nuw nsw i32 %i3.017, 1
%exitcond = icmp eq i32 %inc9, 1024
br i1 %exitcond, label %for.cond.cleanup6, label %for.body7
}
declare void @bar(...) #1
declare void @bar(...)
attributes #0 = { nounwind "target-cpu"="a2q" }
attributes #1 = { "target-cpu"="a2q" }
attributes #2 = { nounwind }
attributes #0 = { nounwind }

View File

@ -41,6 +41,6 @@ define void @aligned_slot() #0 {
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { nounwind }

View File

@ -1,5 +1,4 @@
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
; RUN: llc -verify-machineinstrs -O3 -mcpu=a2q < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 < %s | FileCheck %s -check-prefix=FIXPOINT
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@ -93,9 +92,6 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds1:
; CHECK: # %bb.0:
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
@ -110,9 +106,6 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <
define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds2:
; CHECK: # %bb.0:
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
@ -127,9 +120,6 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds3:
; CHECK: # %bb.0:
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
@ -144,9 +134,6 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
; CHECK-LABEL: vector_reassociate_adds4:
; CHECK: # %bb.0:
; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
@ -217,9 +204,6 @@ define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) {
; CHECK-LABEL: reassociate_mamaa_double:
; CHECK: # %bb.0:
; CHECK-QPX-DAG: fmadd [[REG0:[0-9]+]], 4, 3, 2
; CHECK-QPX-DAG: fmadd [[REG1:[0-9]+]], 6, 5, 1
; CHECK-QPX: fadd 1, [[REG0]], [[REG1]]
; CHECK-PWR-DAG: xsmaddadp 1, 6, 5
; CHECK-PWR-DAG: xsmaddadp 2, 4, 3
; CHECK-PWR: xsadddp 1, 2, 1
@ -250,9 +234,6 @@ define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, fl
define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) {
; CHECK-LABEL: reassociate_mamaa_vec:
; CHECK: # %bb.0:
; CHECK-QPX-DAG: qvfmadds [[REG0:[0-9]+]], 4, 3, 2
; CHECK-QPX-DAG: qvfmadds [[REG1:[0-9]+]], 6, 5, 1
; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
; CHECK-PWR-DAG: xvmaddasp [[REG0:[0-9]+]], 39, 38
; CHECK-PWR-DAG: xvmaddasp [[REG1:[0-9]+]], 37, 36
; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]]
@ -268,11 +249,6 @@ define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x f
define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) {
; CHECK-LABEL: reassociate_mamama_double:
; CHECK: # %bb.0:
; CHECK-QPX: fmadd [[REG0:[0-9]+]], 2, 1, 7
; CHECK-QPX-DAG: fmul [[REG1:[0-9]+]], 4, 3
; CHECK-QPX-DAG: fmadd [[REG2:[0-9]+]], 6, 5, [[REG0]]
; CHECK-QPX-DAG: fmadd [[REG3:[0-9]+]], 9, 8, [[REG1]]
; CHECK-QPX: fadd 1, [[REG2]], [[REG3]]
; CHECK-PWR: xsmaddadp 7, 2, 1
; CHECK-PWR-DAG: xsmuldp [[REG0:[0-9]+]], 4, 3
; CHECK-PWR-DAG: xsmaddadp 7, 6, 5

View File

@ -19,7 +19,7 @@ entry:
declare void @bar(double) #1
attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #2 = { nounwind }

View File

@ -1,9 +1,8 @@
; RUN: opt -ee-instrument < %s | opt -inline | llc | FileCheck %s
; RUN: opt -ee-instrument < %s | opt -inline | llc -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
; The run-line mimics how Clang might run the instrumentation passes.
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
define void @leaf_function() #0 {

View File

@ -1,6 +1,5 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PWR7
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR8
; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s -check-prefix=A2Q
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@ -25,12 +24,6 @@ entry:
; PWR8: lxvw4x
; PWR8: stxvw4x
; PWR8: blr
; A2Q-LABEL: @foo1
; A2Q-NOT: bl memcpy
; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
; A2Q: blr
}
; Function Attrs: nounwind
@ -52,12 +45,6 @@ entry:
; PWR8: lxvw4x
; PWR8: stxvw4x
; PWR8: blr
; A2Q-LABEL: @foo2
; A2Q-NOT: bl memcpy
; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
; A2Q: blr
}
; Function Attrs: nounwind
@ -76,11 +63,6 @@ entry:
; PWR8-NOT: bl memset
; PWR8: stxvw4x
; PWR8: blr
; A2Q-LABEL: @bar1
; A2Q-NOT: bl memset
; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
; A2Q: blr
}
; Function Attrs: nounwind
@ -99,11 +81,6 @@ entry:
; PWR8-NOT: bl memset
; PWR8: stxvw4x
; PWR8: blr
; A2Q-LABEL: @bar2
; A2Q-NOT: bl memset
; A2Q: qvstfdx
; A2Q: blr
}
; Function Attrs: nounwind

View File

@ -1,48 +0,0 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -O0 < %s | FileCheck %s -check-prefix=CHECK-O0
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
; Function Attrs: nounwind
define void @test_qpx() unnamed_addr #0 align 2 {
entry:
%0 = load i32, i32* undef, align 4
%1 = trunc i32 %0 to i8
call void @llvm.memset.p0i8.i64(i8* align 32 null, i8 %1, i64 64, i1 false)
ret void
; CHECK-LABEL: @test_qpx
; CHECK: qvstfdx
; CHECK: qvstfdx
; CHECK: blr
; CHECK-O0-LABEL: @test_qpx
; CHECK-O0-NOT: qvstfdx
; CHECK-O0: blr
}
; Function Attrs: nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
; Function Attrs: nounwind
define void @test_vsx() unnamed_addr #2 align 2 {
entry:
%0 = load i32, i32* undef, align 4
%1 = trunc i32 %0 to i8
call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i1 false)
ret void
; CHECK-LABEL: @test_vsx
; CHECK: stxvw4x
; CHECK: stxvw4x
; CHECK: blr
; CHECK-O0-LABEL: @test_vsx
; CHECK-O0-NOT: stxvw4x
; CHECK-O0: blr
}
attributes #0 = { nounwind "target-cpu"="a2q" }
attributes #1 = { nounwind }
attributes #2 = { nounwind "target-cpu"="pwr7" }

View File

@ -1,8 +1,7 @@
; RUN: llc -verify-machineinstrs < %s -enable-misched -pre-RA-sched=source -scheditins=false \
; RUN: -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
; RUN: -disable-ifcvt-triangle-false -disable-post-ra -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
;
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
; %val1 is a load live out of %entry. It should be hoisted
; above the add.

View File

@ -1,7 +1,6 @@
; RUN: llc < %s -enable-misched -verify-machineinstrs
; PR14302
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
@b = external global [16000 x double], align 32

View File

@ -1,6 +1,5 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
declare zeroext i1 @ri1()
declare void @se1()

View File

@ -92,7 +92,7 @@ entry:
; Left the target features in this test because it is important that caller has
; -pcrelative-memops while callee has +pcrelative-memops
attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-qpx,-spe" }
attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-qpx,-spe" }
attributes #0 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
attributes #1 = { "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+pcrelative-memops,+power8-vector,+power9-vector,+vsx,-htm,-spe" }
attributes #2 = { nounwind "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+power9-vector,+vsx,-htm,-pcrelative-memops,-spe" }
attributes #3 = { nounwind }

View File

@ -1,8 +1,6 @@
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+popcntd < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOWPC
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=pwr7 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q < %s | FileCheck %s --check-prefix=SLOWPC
; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q -mattr=+popcntd < %s | FileCheck %s
define i64 @_cntb64(i64 %x) nounwind readnone {
%cnt = tail call i64 @llvm.ppc.popcntb(i64 %x)

View File

@ -105,14 +105,3 @@
; STOP-AFTER-BRANCH-COALESCING-NOT: "ppc-branch-coalescing" pass is not registered.
; STOP-AFTER-BRANCH-COALESCING: Branch Coalescing
; Test pass name: ppc-qpx-load-splat.
; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-QPX-LOAD-SPLAT
; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: -ppc-qpx-load-splat
; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
; STOP-BEFORE-QPX-LOAD-SPLAT-NOT: PowerPC QPX Load Splat Simplification
; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-qpx-load-splat -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-QPX-LOAD-SPLAT
; STOP-AFTER-QPX-LOAD-SPLAT: -ppc-qpx-load-splat
; STOP-AFTER-QPX-LOAD-SPLAT-NOT: "ppc-qpx-load-splat" pass is not registered.
; STOP-AFTER-QPX-LOAD-SPLAT: PowerPC QPX Load Splat Simplification

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO
; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -code-model=small | FileCheck %s -check-prefix=SCM
; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because
@ -117,23 +117,6 @@ define void @caller_local_sret_32(%S_32* %a) #1 {
attributes #0 = { noinline nounwind }
attributes #1 = { nounwind }
; vector <4 x i1> test
define void @callee_v4i1(i8 %a, <4 x i1> %b, <4 x i1> %c) { ret void }
define void @caller_v4i1_reorder(i8 %a, <4 x i1> %b, <4 x i1> %c) {
tail call void @callee_v4i1(i8 %a, <4 x i1> %c, <4 x i1> %b)
ret void
; <4 x i1> is 32 bytes aligned, if subtarget doesn't support qpx, then we can't
; place b, c to qpx register, so we can't do sco on caller_v4i1_reorder
; CHECK-SCO-LABEL: caller_v4i1_reorder:
; CHECK-SCO: bl callee_v4i1
; CHECK-SCO-HASQPX-LABEL: caller_v4i1_reorder:
; CHECK-SCO-HASQPX: b callee_v4i1
}
define void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) { ret void }
define void @f128_caller(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) {
tail call void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b)

View File

@ -47,8 +47,8 @@ declare double @pow(double, double) #0
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }

View File

@ -18,7 +18,7 @@ entry:
declare fastcc void @bar([2 x i64], [2 x i64]) unnamed_addr #1 align 2
attributes #0 = { argmemonly nounwind }
attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!llvm.ident = !{!0}

View File

@ -67,4 +67,4 @@ bb:
ret void
}
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
target triple = "powerpc64le-unknown-linux"
%t1 = type { %t2*, %t3* }
%t2 = type <{ %t3*, i32, [4 x i8] }>

View File

@ -1,33 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
define void @s452(i32 %inp1) nounwind {
entry:
br label %for.body4
for.body4: ; preds = %for.body4, %entry
%conv.4 = sitofp i32 %inp1 to double
%conv.5 = sitofp i32 %inp1 to double
%mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
%v = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
%vv = fmul <2 x double> %v, %v
%add7.4 = fadd <2 x double> %vv, %vv
store <2 x double> %add7.4, <2 x double>* undef, align 16
br i1 undef, label %for.end, label %for.body4
for.end: ; preds = %for.body4
unreachable
; CHECK-LABEL: @s452
; CHECK: lfiwax [[REG1:[0-9]+]],
; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
; FIXME: We could 'promote' this to a vector earlier and remove this splat.
; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
; CHECK: qvfmul
; CHECK: qvfadd
; CHECK: qvesplati {{[0-9]+}},
; FIXME: We can use qvstfcdx here instead of two stores.
; CHECK: stfd
; CHECK: stfd
}

View File

@ -1,37 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
%v1 = insertelement <4 x double> undef, double %f1, i32 0
%v2 = insertelement <4 x double> %v1, double %f2, i32 1
%v3 = insertelement <4 x double> %v2, double %f3, i32 2
%v4 = insertelement <4 x double> %v3, double %f4, i32 3
ret <4 x double> %v4
; CHECK-LABEL: @foo
; CHECK: qvgpci [[REG1:[0-9]+]], 275
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
; CHECK: blr
}
define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
%v1 = insertelement <4 x float> undef, float %f1, i32 0
%v2 = insertelement <4 x float> %v1, float %f2, i32 1
%v3 = insertelement <4 x float> %v2, float %f3, i32 2
%v4 = insertelement <4 x float> %v3, float %f4, i32 3
ret <4 x float> %v4
; CHECK-LABEL: @goo
; CHECK: qvgpci [[REG1:[0-9]+]], 275
; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
; CHECK: blr
}

View File

@ -1,22 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
declare <4 x double> @foo(<4 x double> %p)
define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
entry:
%v = call <4 x double> @foo(<4 x double> %p)
%w = call <4 x double> @foo(<4 x double> %q)
%x = fadd <4 x double> %v, %w
ret <4 x double> %x
; CHECK-LABEL: @bar
; CHECK: qvstfdx 2,
; CHECK: bl foo
; CHECK: qvstfdx 1,
; CHECK: qvlfdx 1,
; CHECK: bl foo
; CHECK: qvlfdx [[REG:[0-9]+]],
; CHECK: qvfadd 1, [[REG]], 1
}

View File

@ -1,80 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
; Function Attrs: norecurse nounwind readonly
define <4 x double> @foo(double* nocapture readonly %a) #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvdsx v2, 0, r3
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr
entry:
%0 = load double, double* %a, align 8
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %shuffle.i
}
define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
; CHECK-LABEL: foox:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: lxvdsx v2, r3, r4
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr
entry:
%p = getelementptr double, double* %a, i64 %idx
%0 = load double, double* %p, align 8
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %shuffle.i
}
define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
; CHECK-LABEL: fooxu:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: add r6, r3, r4
; CHECK-NEXT: std r6, 0(r5)
; CHECK-NEXT: lxvdsx v2, r3, r4
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr
entry:
%p = getelementptr double, double* %a, i64 %idx
%0 = load double, double* %p, align 8
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
store double* %p, double** %pptr, align 8
ret <4 x double> %shuffle.i
}
define <4 x float> @foof(float* nocapture readonly %a) #0 {
; CHECK-LABEL: foof:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxspltw v2, vs0, 1
; CHECK-NEXT: blr
entry:
%0 = load float, float* %a, align 4
%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %shuffle.i
}
define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
; CHECK-LABEL: foofx:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: lfiwzx f0, r3, r4
; CHECK-NEXT: xxspltw v2, vs0, 1
; CHECK-NEXT: blr
entry:
%p = getelementptr float, float* %a, i64 %idx
%0 = load float, float* %p, align 4
%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %shuffle.i
}

View File

@ -1,26 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
define <4 x double> @foo(<4 x double>* %p) {
entry:
%v = load <4 x double>, <4 x double>* %p, align 8
ret <4 x double> %v
}
; CHECK: @foo
; CHECK-DAG: li [[REG1:[0-9]+]], 31
; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
; CHECK: blr
define <4 x double> @bar(<4 x double>* %p) {
entry:
%v = load <4 x double>, <4 x double>* %p, align 32
ret <4 x double> %v
}
; CHECK: @bar
; CHECK: qvlfdx

View File

@ -1,79 +0,0 @@
; RUN: llc -verify-machineinstrs -stop-after=finalize-isel < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
define <2 x double> @test_qvfmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
; CHECK: test_qvfmadd
; CHECK: QVFMADD %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <2 x double> %2, %1
%5 = fadd reassoc nsz <2 x double> %4, %0
ret <2 x double> %5
}
define <4 x float> @test_qvfmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
; CHECK: test_qvfmadds
; CHECK: QVFMADDSs %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <4 x float> %2, %1
%5 = fadd reassoc nsz <4 x float> %4, %0
ret <4 x float> %5
}
define <2 x double> @test_qvfnmadd(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
; CHECK: test_qvfnmadd
; CHECK: QVFNMADD %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <2 x double> %2, %1
%5 = fadd reassoc nsz <2 x double> %4, %0
%6 = fneg reassoc nsz <2 x double> %5
ret <2 x double> %6
}
define <4 x float> @test_qvfnmadds(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
; CHECK: test_qvfnmadds
; CHECK: QVFNMADDSs %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <4 x float> %2, %1
%5 = fadd reassoc nsz <4 x float> %4, %0
%6 = fneg reassoc nsz <4 x float> %5
ret <4 x float> %6
}
define <2 x double> @test_qvfmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
; CHECK: test_qvfmsub
; CHECK: QVFMSUB %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <2 x double> %2, %1
%5 = fsub reassoc nsz <2 x double> %4, %0
ret <2 x double> %5
}
define <4 x float> @test_qvfmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
; CHECK: test_qvfmsubs
; CHECK: QVFMSUBSs %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <4 x float> %2, %1
%5 = fsub reassoc nsz <4 x float> %4, %0
ret <4 x float> %5
}
define <2 x double> @test_qvfnmsub(<2 x double> %0, <2 x double> %1, <2 x double> %2) {
; CHECK: test_qvfnmsub
; CHECK: QVFNMSUB %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <2 x double> %2, %1
%5 = fsub reassoc nsz <2 x double> %4, %0
%6 = fneg reassoc nsz <2 x double> %5
ret <2 x double> %6
}
define <4 x float> @test_qvfnmsubs(<4 x float> %0, <4 x float> %1, <4 x float> %2) {
; CHECK: test_qvfnmsubs
; CHECK: QVFNMSUBSs %2, %1, %0, implicit $rm
;
%4 = fmul reassoc nsz <4 x float> %2, %1
%5 = fsub reassoc nsz <4 x float> %4, %0
%6 = fneg reassoc nsz <4 x float> %5
ret <4 x float> %6
}

View File

@ -1,473 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind {
; CHECK-LABEL: foo_fmf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: qvfrsqrte 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-NEXT: qvlfdx 0, 0, 3
; CHECK-NEXT: qvfmul 4, 3, 3
; CHECK-NEXT: qvfmsub 2, 2, 0, 2
; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
; CHECK-NEXT: qvfmul 3, 3, 4
; CHECK-NEXT: qvfmul 4, 3, 3
; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
; CHECK-NEXT: qvfmul 0, 3, 0
; CHECK-NEXT: qvfmul 1, 1, 0
; CHECK-NEXT: blr
entry:
%x = call ninf afn reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%r = fdiv arcp reassoc <4 x double> %a, %x
ret <4 x double> %r
}
define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind {
; CHECK-LABEL: foo_safe:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 5, 2, 3
; CHECK-NEXT: qvesplati 3, 2, 1
; CHECK-NEXT: qvesplati 4, 2, 2
; CHECK-NEXT: fsqrt 2, 2
; CHECK-NEXT: fsqrt 5, 5
; CHECK-NEXT: fsqrt 4, 4
; CHECK-NEXT: fsqrt 3, 3
; CHECK-NEXT: qvesplati 6, 1, 3
; CHECK-NEXT: qvgpci 0, 275
; CHECK-NEXT: fdiv 2, 1, 2
; CHECK-NEXT: fdiv 5, 6, 5
; CHECK-NEXT: qvesplati 6, 1, 2
; CHECK-NEXT: qvesplati 1, 1, 1
; CHECK-NEXT: fdiv 4, 6, 4
; CHECK-NEXT: fdiv 1, 1, 3
; CHECK-NEXT: qvfperm 3, 4, 5, 0
; CHECK-NEXT: qvfperm 0, 2, 1, 0
; CHECK-NEXT: qvgpci 1, 101
; CHECK-NEXT: qvfperm 1, 0, 3, 1
; CHECK-NEXT: blr
entry:
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%r = fdiv <4 x double> %a, %x
ret <4 x double> %r
}
define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: foof_fmf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
; CHECK-NEXT: qvfrsqrtes 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l
; CHECK-NEXT: qvlfsx 0, 0, 3
; CHECK-NEXT: qvfmuls 4, 3, 3
; CHECK-NEXT: qvfmsubs 2, 2, 0, 2
; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0
; CHECK-NEXT: qvfmuls 0, 3, 0
; CHECK-NEXT: qvfmul 1, 1, 0
; CHECK-NEXT: blr
entry:
%x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%y = fpext <4 x float> %x to <4 x double>
%r = fdiv arcp reassoc nsz <4 x double> %a, %y
ret <4 x double> %r
}
define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: foof_safe:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 0, 2, 3
; CHECK-NEXT: qvesplati 3, 2, 2
; CHECK-NEXT: fsqrts 4, 2
; CHECK-NEXT: qvesplati 2, 2, 1
; CHECK-NEXT: fsqrts 0, 0
; CHECK-NEXT: fsqrts 3, 3
; CHECK-NEXT: fsqrts 2, 2
; CHECK-NEXT: qvgpci 5, 275
; CHECK-NEXT: qvgpci 6, 101
; CHECK-NEXT: qvfperm 0, 3, 0, 5
; CHECK-NEXT: qvesplati 3, 1, 2
; CHECK-NEXT: qvfperm 2, 4, 2, 5
; CHECK-NEXT: qvfperm 0, 2, 0, 6
; CHECK-NEXT: qvesplati 2, 1, 3
; CHECK-NEXT: qvesplati 4, 0, 3
; CHECK-NEXT: fdiv 2, 2, 4
; CHECK-NEXT: qvesplati 4, 0, 2
; CHECK-NEXT: fdiv 3, 3, 4
; CHECK-NEXT: qvesplati 4, 1, 1
; CHECK-NEXT: fdiv 1, 1, 0
; CHECK-NEXT: qvesplati 0, 0, 1
; CHECK-NEXT: fdiv 0, 4, 0
; CHECK-NEXT: qvfperm 2, 3, 2, 5
; CHECK-NEXT: qvfperm 0, 1, 0, 5
; CHECK-NEXT: qvfperm 1, 0, 2, 6
; CHECK-NEXT: blr
entry:
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%y = fpext <4 x float> %x to <4 x double>
%r = fdiv <4 x double> %a, %y
ret <4 x double> %r
}
define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind {
; CHECK-LABEL: food_fmf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
; CHECK-NEXT: qvfrsqrte 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI4_0@toc@l
; CHECK-NEXT: qvlfdx 0, 0, 3
; CHECK-NEXT: qvfmul 4, 3, 3
; CHECK-NEXT: qvfmsub 2, 2, 0, 2
; CHECK-NEXT: qvfnmsub 4, 2, 4, 0
; CHECK-NEXT: qvfmul 3, 3, 4
; CHECK-NEXT: qvfmul 4, 3, 3
; CHECK-NEXT: qvfnmsub 0, 2, 4, 0
; CHECK-NEXT: qvfmul 0, 3, 0
; CHECK-NEXT: qvfrsp 0, 0
; CHECK-NEXT: qvfmuls 1, 1, 0
; CHECK-NEXT: blr
entry:
%x = call afn ninf reassoc <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%y = fptrunc <4 x double> %x to <4 x float>
%r = fdiv arcp reassoc <4 x float> %a, %y
ret <4 x float> %r
}
define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind {
; CHECK-LABEL: food_safe:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 0, 2, 3
; CHECK-NEXT: qvesplati 3, 2, 2
; CHECK-NEXT: fsqrt 4, 2
; CHECK-NEXT: qvesplati 2, 2, 1
; CHECK-NEXT: fsqrt 0, 0
; CHECK-NEXT: fsqrt 3, 3
; CHECK-NEXT: fsqrt 2, 2
; CHECK-NEXT: qvgpci 5, 275
; CHECK-NEXT: qvgpci 6, 101
; CHECK-NEXT: qvfperm 0, 3, 0, 5
; CHECK-NEXT: qvesplati 3, 1, 2
; CHECK-NEXT: qvfperm 2, 4, 2, 5
; CHECK-NEXT: qvfperm 0, 2, 0, 6
; CHECK-NEXT: qvesplati 2, 1, 3
; CHECK-NEXT: qvfrsp 0, 0
; CHECK-NEXT: qvesplati 4, 0, 3
; CHECK-NEXT: fdivs 2, 2, 4
; CHECK-NEXT: qvesplati 4, 0, 2
; CHECK-NEXT: fdivs 3, 3, 4
; CHECK-NEXT: qvesplati 4, 1, 1
; CHECK-NEXT: fdivs 1, 1, 0
; CHECK-NEXT: qvesplati 0, 0, 1
; CHECK-NEXT: fdivs 0, 4, 0
; CHECK-NEXT: qvfperm 2, 3, 2, 5
; CHECK-NEXT: qvfperm 0, 1, 0, 5
; CHECK-NEXT: qvfperm 1, 0, 2, 6
; CHECK-NEXT: blr
entry:
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
%y = fptrunc <4 x double> %x to <4 x float>
%r = fdiv <4 x float> %a, %y
ret <4 x float> %r
}
define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: goo_fmf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha
; CHECK-NEXT: qvfrsqrtes 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l
; CHECK-NEXT: qvlfsx 0, 0, 3
; CHECK-NEXT: qvfmuls 4, 3, 3
; CHECK-NEXT: qvfmsubs 2, 2, 0, 2
; CHECK-NEXT: qvfnmsubs 0, 2, 4, 0
; CHECK-NEXT: qvfmuls 0, 3, 0
; CHECK-NEXT: qvfmuls 1, 1, 0
; CHECK-NEXT: blr
entry:
%x = call afn ninf reassoc <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%r = fdiv arcp reassoc nsz <4 x float> %a, %x
ret <4 x float> %r
}
define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: goo_safe:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 5, 2, 3
; CHECK-NEXT: qvesplati 3, 2, 1
; CHECK-NEXT: qvesplati 4, 2, 2
; CHECK-NEXT: fsqrts 2, 2
; CHECK-NEXT: fsqrts 5, 5
; CHECK-NEXT: fsqrts 4, 4
; CHECK-NEXT: fsqrts 3, 3
; CHECK-NEXT: qvesplati 6, 1, 3
; CHECK-NEXT: qvgpci 0, 275
; CHECK-NEXT: fdivs 2, 1, 2
; CHECK-NEXT: fdivs 5, 6, 5
; CHECK-NEXT: qvesplati 6, 1, 2
; CHECK-NEXT: qvesplati 1, 1, 1
; CHECK-NEXT: fdivs 4, 6, 4
; CHECK-NEXT: fdivs 1, 1, 3
; CHECK-NEXT: qvfperm 3, 4, 5, 0
; CHECK-NEXT: qvfperm 0, 2, 1, 0
; CHECK-NEXT: qvgpci 1, 101
; CHECK-NEXT: qvfperm 1, 0, 3, 1
; CHECK-NEXT: blr
entry:
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
%r = fdiv <4 x float> %a, %x
ret <4 x float> %r
}
define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
; CHECK-LABEL: foo2_fmf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI8_0@toc@ha
; CHECK-NEXT: qvfre 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l
; CHECK-NEXT: qvlfdx 0, 0, 3
; CHECK-NEXT: qvfmadd 0, 2, 3, 0
; CHECK-NEXT: qvfnmsub 0, 3, 0, 3
; CHECK-NEXT: qvfmul 3, 1, 0
; CHECK-NEXT: qvfnmsub 1, 2, 3, 1
; CHECK-NEXT: qvfmadd 1, 0, 1, 3
; CHECK-NEXT: blr
entry:
%r = fdiv arcp reassoc nsz ninf <4 x double> %a, %b
ret <4 x double> %r
}
define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind {
; CHECK-LABEL: foo2_safe:
; CHECK: # %bb.0:
; CHECK-NEXT: qvesplati 3, 2, 3
; CHECK-NEXT: qvesplati 4, 1, 3
; CHECK-NEXT: qvesplati 5, 2, 2
; CHECK-NEXT: qvgpci 0, 275
; CHECK-NEXT: fdiv 3, 4, 3
; CHECK-NEXT: qvesplati 4, 1, 2
; CHECK-NEXT: fdiv 4, 4, 5
; CHECK-NEXT: fdiv 5, 1, 2
; CHECK-NEXT: qvesplati 2, 2, 1
; CHECK-NEXT: qvesplati 1, 1, 1
; CHECK-NEXT: fdiv 1, 1, 2
; CHECK-NEXT: qvfperm 2, 4, 3, 0
; CHECK-NEXT: qvfperm 0, 5, 1, 0
; CHECK-NEXT: qvgpci 1, 101
; CHECK-NEXT: qvfperm 1, 0, 2, 1
; CHECK-NEXT: blr
%r = fdiv <4 x double> %a, %b
ret <4 x double> %r
}
define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: goo2_fmf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvfres 0, 2
; CHECK-NEXT: qvfmuls 3, 1, 0
; CHECK-NEXT: qvfnmsubs 1, 2, 3, 1
; CHECK-NEXT: qvfmadds 1, 0, 1, 3
; CHECK-NEXT: blr
entry:
%r = fdiv arcp reassoc ninf <4 x float> %a, %b
ret <4 x float> %r
}
define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: goo2_safe:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 3, 2, 3
; CHECK-NEXT: qvesplati 4, 1, 3
; CHECK-NEXT: qvesplati 5, 2, 2
; CHECK-NEXT: qvgpci 0, 275
; CHECK-NEXT: fdivs 3, 4, 3
; CHECK-NEXT: qvesplati 4, 1, 2
; CHECK-NEXT: fdivs 4, 4, 5
; CHECK-NEXT: fdivs 5, 1, 2
; CHECK-NEXT: qvesplati 2, 2, 1
; CHECK-NEXT: qvesplati 1, 1, 1
; CHECK-NEXT: fdivs 1, 1, 2
; CHECK-NEXT: qvfperm 2, 4, 3, 0
; CHECK-NEXT: qvfperm 0, 5, 1, 0
; CHECK-NEXT: qvgpci 1, 101
; CHECK-NEXT: qvfperm 1, 0, 2, 1
; CHECK-NEXT: blr
entry:
%r = fdiv <4 x float> %a, %b
ret <4 x float> %r
}
define <4 x double> @foo3_fmf_denorm_on(<4 x double> %a) #0 {
; CHECK-LABEL: foo3_fmf_denorm_on:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI12_0@toc@ha
; CHECK-NEXT: qvfrsqrte 0, 1
; CHECK-NEXT: addi 3, 3, .LCPI12_0@toc@l
; CHECK-NEXT: qvlfdx 2, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI12_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI12_1@toc@l
; CHECK-NEXT: qvfmul 3, 0, 0
; CHECK-NEXT: qvfmsub 4, 1, 2, 1
; CHECK-NEXT: qvfnmsub 3, 4, 3, 2
; CHECK-NEXT: qvfmul 0, 0, 3
; CHECK-NEXT: qvfmul 3, 0, 0
; CHECK-NEXT: qvfnmsub 2, 4, 3, 2
; CHECK-NEXT: qvfmul 0, 0, 2
; CHECK-NEXT: qvlfdx 2, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI12_2@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI12_2@toc@l
; CHECK-NEXT: qvlfdx 3, 0, 3
; CHECK-NEXT: qvfmul 0, 0, 1
; CHECK-NEXT: qvfabs 1, 1
; CHECK-NEXT: qvfcmplt 1, 1, 2
; CHECK-NEXT: qvfsel 1, 1, 3, 0
; CHECK-NEXT: blr
entry:
%r = call reassoc ninf afn <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
ret <4 x double> %r
}
define <4 x double> @foo3_fmf_denorm_off(<4 x double> %a) #1 {
; CHECK-LABEL: foo3_fmf_denorm_off:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI13_0@toc@ha
; CHECK-NEXT: qvfrsqrte 0, 1
; CHECK-NEXT: addi 3, 3, .LCPI13_0@toc@l
; CHECK-NEXT: qvlfdx 2, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI13_1@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI13_1@toc@l
; CHECK-NEXT: qvfmul 3, 0, 0
; CHECK-NEXT: qvfmsub 4, 1, 2, 1
; CHECK-NEXT: qvfnmsub 3, 4, 3, 2
; CHECK-NEXT: qvfmul 0, 0, 3
; CHECK-NEXT: qvfmul 3, 0, 0
; CHECK-NEXT: qvfnmsub 2, 4, 3, 2
; CHECK-NEXT: qvfmul 0, 0, 2
; CHECK-NEXT: qvlfdx 2, 0, 3
; CHECK-NEXT: qvfmul 0, 0, 1
; CHECK-NEXT: qvfcmpeq 1, 1, 2
; CHECK-NEXT: qvfsel 1, 1, 2, 0
; CHECK-NEXT: blr
entry:
%r = call afn reassoc ninf <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
ret <4 x double> %r
}
define <4 x double> @foo3_safe_denorm_on(<4 x double> %a) #0 {
; CHECK-LABEL: foo3_safe_denorm_on:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 2, 1, 3
; CHECK-NEXT: qvesplati 3, 1, 2
; CHECK-NEXT: fsqrt 4, 1
; CHECK-NEXT: qvesplati 1, 1, 1
; CHECK-NEXT: fsqrt 2, 2
; CHECK-NEXT: fsqrt 3, 3
; CHECK-NEXT: fsqrt 1, 1
; CHECK-NEXT: qvgpci 0, 275
; CHECK-NEXT: qvfperm 2, 3, 2, 0
; CHECK-NEXT: qvfperm 0, 4, 1, 0
; CHECK-NEXT: qvgpci 1, 101
; CHECK-NEXT: qvfperm 1, 0, 2, 1
; CHECK-NEXT: blr
entry:
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
ret <4 x double> %r
}
define <4 x double> @foo3_safe_denorm_off(<4 x double> %a) #1 {
; CHECK-LABEL: foo3_safe_denorm_off:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 2, 1, 3
; CHECK-NEXT: qvesplati 3, 1, 2
; CHECK-NEXT: fsqrt 4, 1
; CHECK-NEXT: qvesplati 1, 1, 1
; CHECK-NEXT: fsqrt 2, 2
; CHECK-NEXT: fsqrt 3, 3
; CHECK-NEXT: fsqrt 1, 1
; CHECK-NEXT: qvgpci 0, 275
; CHECK-NEXT: qvfperm 2, 3, 2, 0
; CHECK-NEXT: qvfperm 0, 4, 1, 0
; CHECK-NEXT: qvgpci 1, 101
; CHECK-NEXT: qvfperm 1, 0, 2, 1
; CHECK-NEXT: blr
entry:
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
ret <4 x double> %r
}
define <4 x float> @goo3_fmf_denorm_on(<4 x float> %a) #0 {
; CHECK-LABEL: goo3_fmf_denorm_on:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI16_1@toc@ha
; CHECK-NEXT: qvfrsqrtes 2, 1
; CHECK-NEXT: addi 3, 3, .LCPI16_1@toc@l
; CHECK-NEXT: qvlfsx 0, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI16_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI16_0@toc@l
; CHECK-NEXT: qvfmuls 4, 2, 2
; CHECK-NEXT: qvfmsubs 3, 1, 0, 1
; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0
; CHECK-NEXT: qvlfsx 3, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI16_2@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI16_2@toc@l
; CHECK-NEXT: qvlfsx 4, 0, 3
; CHECK-NEXT: qvfmuls 0, 2, 0
; CHECK-NEXT: qvfabs 2, 1
; CHECK-NEXT: qvfmuls 0, 0, 1
; CHECK-NEXT: qvfcmplt 1, 2, 3
; CHECK-NEXT: qvfsel 1, 1, 4, 0
; CHECK-NEXT: blr
entry:
%r = call reassoc afn ninf nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r
}
define <4 x float> @goo3_fmf_denorm_off(<4 x float> %a) #1 {
; CHECK-LABEL: goo3_fmf_denorm_off:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI17_1@toc@ha
; CHECK-NEXT: qvfrsqrtes 2, 1
; CHECK-NEXT: addi 3, 3, .LCPI17_1@toc@l
; CHECK-NEXT: qvlfsx 0, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l
; CHECK-NEXT: qvfmuls 4, 2, 2
; CHECK-NEXT: qvfmsubs 3, 1, 0, 1
; CHECK-NEXT: qvfnmsubs 0, 3, 4, 0
; CHECK-NEXT: qvlfsx 3, 0, 3
; CHECK-NEXT: qvfmuls 0, 2, 0
; CHECK-NEXT: qvfmuls 0, 0, 1
; CHECK-NEXT: qvfcmpeq 1, 1, 3
; CHECK-NEXT: qvfsel 1, 1, 3, 0
; CHECK-NEXT: blr
entry:
%r = call reassoc ninf afn nsz <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r
}
define <4 x float> @goo3_safe(<4 x float> %a) nounwind {
; CHECK-LABEL: goo3_safe:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: qvesplati 2, 1, 3
; CHECK-NEXT: qvesplati 3, 1, 2
; CHECK-NEXT: fsqrts 4, 1
; CHECK-NEXT: qvesplati 1, 1, 1
; CHECK-NEXT: fsqrts 2, 2
; CHECK-NEXT: fsqrts 3, 3
; CHECK-NEXT: fsqrts 1, 1
; CHECK-NEXT: qvgpci 0, 275
; CHECK-NEXT: qvfperm 2, 3, 2, 0
; CHECK-NEXT: qvfperm 0, 4, 1, 0
; CHECK-NEXT: qvgpci 1, 101
; CHECK-NEXT: qvfperm 1, 0, 2, 1
; CHECK-NEXT: blr
entry:
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r
}
attributes #0 = { nounwind "denormal-fp-math"="ieee,ieee" }
attributes #1 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }

View File

@ -1,109 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define <4 x float> @test1(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test1:
; CHECK: qvfrim 1, 1
; CHECK-FM: test1:
; CHECK-FM: qvfrim 1, 1
}
declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test2(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test2:
; CHECK: qvfrim 1, 1
; CHECK-FM: test2:
; CHECK-FM: qvfrim 1, 1
}
declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
define <4 x float> @test3(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test3:
; CHECK-NOT: qvfrin
; CHECK-FM: test3:
; CHECK-FM-NOT: qvfrin
}
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test4(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test4:
; CHECK-NOT: qvfrin
; CHECK-FM: test4:
; CHECK-FM-NOT: qvfrin
}
declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
define <4 x float> @test5(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test5:
; CHECK: qvfrip 1, 1
; CHECK-FM: test5:
; CHECK-FM: qvfrip 1, 1
}
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test6(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test6:
; CHECK: qvfrip 1, 1
; CHECK-FM: test6:
; CHECK-FM: qvfrip 1, 1
}
declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
define <4 x float> @test9(<4 x float> %x) nounwind {
%call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
ret <4 x float> %call
; CHECK: test9:
; CHECK: qvfriz 1, 1
; CHECK-FM: test9:
; CHECK-FM: qvfriz 1, 1
}
declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
define <4 x double> @test10(<4 x double> %x) nounwind {
%call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
ret <4 x double> %call
; CHECK: test10:
; CHECK: qvfriz 1, 1
; CHECK-FM: test10:
; CHECK-FM: qvfriz 1, 1
}
declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone

View File

@ -1,26 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
define <4 x float> @foo(<4 x float>* %p) {
entry:
%v = load <4 x float>, <4 x float>* %p, align 4
ret <4 x float> %v
}
; CHECK: @foo
; CHECK-DAG: li [[REG1:[0-9]+]], 15
; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
; CHECK: blr
define <4 x float> @bar(<4 x float>* %p) {
entry:
%v = load <4 x float>, <4 x float>* %p, align 16
ret <4 x float> %v
}
; CHECK: @bar
; CHECK: qvlfsx

View File

@ -1,143 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
entry:
%r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
ret <4 x float> %r
; CHECK-LABEL: @test1
; CHECK: qvfsel 1, 3, 1, 2
; CHECK: blr
}
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
entry:
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
%r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
ret <4 x float> %r
; CHECK-LABEL: @test2
; CHECK: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}
define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
entry:
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
ret <4 x i1> %v
; CHECK-LABEL: @test3
; CHECK: qvlfsx [[REG:[0-9]+]],
; qvflogical 1, 1, [[REG]], 1
; blr
}
define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
entry:
%q = load <4 x i1>, <4 x i1>* %t, align 16
%v = and <4 x i1> %a, %q
ret <4 x i1> %v
; CHECK-LABEL: @test4
; CHECK-DAG: lbz
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
; CHECK: qvfand 1, 1, [[REG4]]
; CHECK: blr
}
define void @test5(<4 x i1> %a) nounwind {
entry:
store <4 x i1> %a, <4 x i1>* @R
ret void
; CHECK-LABEL: @test5
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: stb
; CHECK: blr
}
define i1 @test6(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test6
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define i1 @test7(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
%s = extractelement <4 x i1> %a, i32 3
%q = and i1 %r, %s
ret i1 %q
; CHECK-LABEL: @test7
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG4:[0-9]+]],
; FIXME: We're storing the vector twice, and that's silly.
; CHECK-DAG: qvstfiwx [[REG3]],
; CHECK: lwz [[REG5:[0-9]+]],
; CHECK: and 3,
; CHECK: blr
}
define i1 @test8(<3 x i1> %a) nounwind {
entry:
%r = extractelement <3 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test8
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
entry:
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
%r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
ret <3 x float> %r
; CHECK-LABEL: @test9
; CHECK: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

View File

@ -1,25 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
define void @foo(<4 x float> %v, <4 x float>* %p) {
entry:
store <4 x float> %v, <4 x float>* %p, align 4
ret void
}
; CHECK: @foo
; CHECK: stfs
; CHECK: stfs
; CHECK: stfs
; CHECK: stfs
; CHECK: blr
define void @bar(<4 x float> %v, <4 x float>* %p) {
entry:
store <4 x float> %v, <4 x float>* %p, align 16
ret void
}
; CHECK: @bar
; CHECK: qvstfsx

View File

@ -1,151 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
entry:
%r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
ret <4 x double> %r
; CHECK-LABEL: @test1
; CHECK: qvfsel 1, 3, 1, 2
; CHECK: blr
}
define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
entry:
%v = insertelement <4 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
%v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
%r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
ret <4 x double> %r
; CHECK-LABEL: @test2
; FIXME: This load/store sequence is unnecessary.
; CHECK-DAG: lbz
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}
define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
entry:
%v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
ret <4 x i1> %v
; CHECK-LABEL: @test3
; CHECK: qvlfsx [[REG:[0-9]+]],
; qvflogical 1, 1, [[REG]], 1
; blr
}
define <4 x i1> @test4(<4 x i1> %a, <4 x i1>* %t) nounwind {
entry:
%q = load <4 x i1>, <4 x i1>* %t, align 16
%v = and <4 x i1> %a, %q
ret <4 x i1> %v
; CHECK-LABEL: @test4
; CHECK-DAG: lbz
; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
; CHECK: qvfand 1, 1, [[REG4]]
; CHECK: blr
}
define void @test5(<4 x i1> %a) nounwind {
entry:
store <4 x i1> %a, <4 x i1>* @R
ret void
; CHECK-LABEL: @test5
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: stb
; CHECK: blr
}
define i1 @test6(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test6
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define i1 @test7(<4 x i1> %a) nounwind {
entry:
%r = extractelement <4 x i1> %a, i32 2
%s = extractelement <4 x i1> %a, i32 3
%q = and i1 %r, %s
ret i1 %q
; CHECK-LABEL: @test7
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG4:[0-9]+]],
; FIXME: We're storing the vector twice, and that's silly.
; CHECK-DAG: qvstfiwx [[REG3]],
; CHECK-DAG: lwz [[REG5:[0-9]+]],
; CHECK: and 3,
; CHECK: blr
}
define i1 @test8(<3 x i1> %a) nounwind {
entry:
%r = extractelement <3 x i1> %a, i32 2
ret i1 %r
; CHECK-LABEL: @test8
; CHECK: qvlfdx [[REG1:[0-9]+]],
; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
; CHECK: qvstfiwx [[REG3]],
; CHECK: lwz
; CHECK: blr
}
define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
entry:
%v = insertelement <3 x i1> undef, i1 %c1, i32 0
%v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
%v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
%r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
ret <3 x double> %r
; CHECK-LABEL: @test9
; FIXME: This load/store sequence is unnecessary.
; CHECK-DAG: lbz
; CHECK-DAG: stw
; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
; CHECK: qvfsel 1, [[REG4]], 1, 2
; CHECK: blr
}

View File

@ -1,31 +0,0 @@
; RUN: llc -verify-machineinstrs -mcpu=a2q < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
; Function Attrs: nounwind
define void @gsl_sf_legendre_Pl_deriv_array(<4 x i32> %inp1, <4 x double> %inp2) #0 {
entry:
br label %vector.body198
vector.body198: ; preds = %vector.body198, %for.body46.lr.ph
%0 = icmp ne <4 x i32> %inp1, zeroinitializer
%1 = select <4 x i1> %0, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
%2 = fmul <4 x double> %inp2, %1
%3 = fmul <4 x double> %inp2, %2
%4 = fmul <4 x double> %3, %inp2
store <4 x double> %4, <4 x double>* undef, align 8
br label %return
; CHECK-LABEL: @gsl_sf_legendre_Pl_deriv_array
; CHECK: qvlfiwzx
; CHECK: qvfcfidu
; CHECK: qvfcmpeq
; CHECK: qvfsel
; CHECK: qvfmul
return: ; preds = %if.else.i
ret void
}
attributes #0 = { nounwind }

View File

@ -1,25 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target triple = "powerpc64-bgq-linux"
define void @foo(<4 x double> %v, <4 x double>* %p) {
entry:
store <4 x double> %v, <4 x double>* %p, align 8
ret void
}
; CHECK: @foo
; CHECK: stfd
; CHECK: stfd
; CHECK: stfd
; CHECK: stfd
; CHECK: blr
define void @bar(<4 x double> %v, <4 x double>* %p) {
entry:
store <4 x double> %v, <4 x double>* %p, align 32
ret void
}
; CHECK: @bar
; CHECK: qvstfdx

View File

@ -1,217 +0,0 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind
define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
entry:
br label %vector.body
; CHECK-LABEL: @foo
; Make sure that the offset constants we use are all even (only the last should be odd).
; CHECK-DAG: li {{[0-9]+}}, 1056
; CHECK-DAG: li {{[0-9]+}}, 1088
; CHECK-DAG: li {{[0-9]+}}, 1152
; CHECK-DAG: li {{[0-9]+}}, 1216
; CHECK-DAG: li {{[0-9]+}}, 1280
; CHECK-DAG: li {{[0-9]+}}, 1344
; CHECK-DAG: li {{[0-9]+}}, 1408
; CHECK-DAG: li {{[0-9]+}}, 1472
; CHECK-DAG: li {{[0-9]+}}, 1536
; CHECK-DAG: li {{[0-9]+}}, 1600
; CHECK-DAG: li {{[0-9]+}}, 1568
; CHECK-DAG: li {{[0-9]+}}, 1664
; CHECK-DAG: li {{[0-9]+}}, 1632
; CHECK-DAG: li {{[0-9]+}}, 1728
; CHECK-DAG: li {{[0-9]+}}, 1696
; CHECK-DAG: li {{[0-9]+}}, 1792
; CHECK-DAG: li {{[0-9]+}}, 1760
; CHECK-DAG: li {{[0-9]+}}, 1856
; CHECK-DAG: li {{[0-9]+}}, 1824
; CHECK-DAG: li {{[0-9]+}}, 1920
; CHECK-DAG: li {{[0-9]+}}, 1888
; CHECK-DAG: li {{[0-9]+}}, 1984
; CHECK-DAG: li {{[0-9]+}}, 1952
; CHECK-DAG: li {{[0-9]+}}, 2016
; CHECK-DAG: li {{[0-9]+}}, 1024
; CHECK-DAG: li {{[0-9]+}}, 1120
; CHECK-DAG: li {{[0-9]+}}, 1184
; CHECK-DAG: li {{[0-9]+}}, 1248
; CHECK-DAG: li {{[0-9]+}}, 1312
; CHECK-DAG: li {{[0-9]+}}, 1376
; CHECK-DAG: li {{[0-9]+}}, 1440
; CHECK-DAG: li {{[0-9]+}}, 1504
; CHECK-DAG: li {{[0-9]+}}, 2047
; CHECK: blr
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
%0 = shl i64 %index, 1
%1 = getelementptr inbounds double, double* %b, i64 %0
%2 = bitcast double* %1 to <8 x double>*
%wide.vec = load <8 x double>, <8 x double>* %2, align 8
%strided.vec = shufflevector <8 x double> %wide.vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%3 = fadd <4 x double> %strided.vec, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%4 = getelementptr inbounds double, double* %a, i64 %index
%5 = bitcast double* %4 to <4 x double>*
store <4 x double> %3, <4 x double>* %5, align 8
%index.next = or i64 %index, 4
%6 = shl i64 %index.next, 1
%7 = getelementptr inbounds double, double* %b, i64 %6
%8 = bitcast double* %7 to <8 x double>*
%wide.vec.1 = load <8 x double>, <8 x double>* %8, align 8
%strided.vec.1 = shufflevector <8 x double> %wide.vec.1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%9 = fadd <4 x double> %strided.vec.1, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%10 = getelementptr inbounds double, double* %a, i64 %index.next
%11 = bitcast double* %10 to <4 x double>*
store <4 x double> %9, <4 x double>* %11, align 8
%index.next.1 = or i64 %index, 8
%12 = shl i64 %index.next.1, 1
%13 = getelementptr inbounds double, double* %b, i64 %12
%14 = bitcast double* %13 to <8 x double>*
%wide.vec.2 = load <8 x double>, <8 x double>* %14, align 8
%strided.vec.2 = shufflevector <8 x double> %wide.vec.2, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%15 = fadd <4 x double> %strided.vec.2, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%16 = getelementptr inbounds double, double* %a, i64 %index.next.1
%17 = bitcast double* %16 to <4 x double>*
store <4 x double> %15, <4 x double>* %17, align 8
%index.next.2 = or i64 %index, 12
%18 = shl i64 %index.next.2, 1
%19 = getelementptr inbounds double, double* %b, i64 %18
%20 = bitcast double* %19 to <8 x double>*
%wide.vec.3 = load <8 x double>, <8 x double>* %20, align 8
%strided.vec.3 = shufflevector <8 x double> %wide.vec.3, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%21 = fadd <4 x double> %strided.vec.3, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%22 = getelementptr inbounds double, double* %a, i64 %index.next.2
%23 = bitcast double* %22 to <4 x double>*
store <4 x double> %21, <4 x double>* %23, align 8
%index.next.3 = or i64 %index, 16
%24 = shl i64 %index.next.3, 1
%25 = getelementptr inbounds double, double* %b, i64 %24
%26 = bitcast double* %25 to <8 x double>*
%wide.vec.4 = load <8 x double>, <8 x double>* %26, align 8
%strided.vec.4 = shufflevector <8 x double> %wide.vec.4, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%27 = fadd <4 x double> %strided.vec.4, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%28 = getelementptr inbounds double, double* %a, i64 %index.next.3
%29 = bitcast double* %28 to <4 x double>*
store <4 x double> %27, <4 x double>* %29, align 8
%index.next.4 = or i64 %index, 20
%30 = shl i64 %index.next.4, 1
%31 = getelementptr inbounds double, double* %b, i64 %30
%32 = bitcast double* %31 to <8 x double>*
%wide.vec.5 = load <8 x double>, <8 x double>* %32, align 8
%strided.vec.5 = shufflevector <8 x double> %wide.vec.5, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%33 = fadd <4 x double> %strided.vec.5, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%34 = getelementptr inbounds double, double* %a, i64 %index.next.4
%35 = bitcast double* %34 to <4 x double>*
store <4 x double> %33, <4 x double>* %35, align 8
%index.next.5 = or i64 %index, 24
%36 = shl i64 %index.next.5, 1
%37 = getelementptr inbounds double, double* %b, i64 %36
%38 = bitcast double* %37 to <8 x double>*
%wide.vec.6 = load <8 x double>, <8 x double>* %38, align 8
%strided.vec.6 = shufflevector <8 x double> %wide.vec.6, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%39 = fadd <4 x double> %strided.vec.6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%40 = getelementptr inbounds double, double* %a, i64 %index.next.5
%41 = bitcast double* %40 to <4 x double>*
store <4 x double> %39, <4 x double>* %41, align 8
%index.next.6 = or i64 %index, 28
%42 = shl i64 %index.next.6, 1
%43 = getelementptr inbounds double, double* %b, i64 %42
%44 = bitcast double* %43 to <8 x double>*
%wide.vec.7 = load <8 x double>, <8 x double>* %44, align 8
%strided.vec.7 = shufflevector <8 x double> %wide.vec.7, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%45 = fadd <4 x double> %strided.vec.7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%46 = getelementptr inbounds double, double* %a, i64 %index.next.6
%47 = bitcast double* %46 to <4 x double>*
store <4 x double> %45, <4 x double>* %47, align 8
%index.next.7 = or i64 %index, 32
%48 = shl i64 %index.next.7, 1
%49 = getelementptr inbounds double, double* %b, i64 %48
%50 = bitcast double* %49 to <8 x double>*
%wide.vec.8 = load <8 x double>, <8 x double>* %50, align 8
%strided.vec.8 = shufflevector <8 x double> %wide.vec.8, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%51 = fadd <4 x double> %strided.vec.8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%52 = getelementptr inbounds double, double* %a, i64 %index.next.7
%53 = bitcast double* %52 to <4 x double>*
store <4 x double> %51, <4 x double>* %53, align 8
%index.next.8 = or i64 %index, 36
%54 = shl i64 %index.next.8, 1
%55 = getelementptr inbounds double, double* %b, i64 %54
%56 = bitcast double* %55 to <8 x double>*
%wide.vec.9 = load <8 x double>, <8 x double>* %56, align 8
%strided.vec.9 = shufflevector <8 x double> %wide.vec.9, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%57 = fadd <4 x double> %strided.vec.9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%58 = getelementptr inbounds double, double* %a, i64 %index.next.8
%59 = bitcast double* %58 to <4 x double>*
store <4 x double> %57, <4 x double>* %59, align 8
%index.next.9 = or i64 %index, 40
%60 = shl i64 %index.next.9, 1
%61 = getelementptr inbounds double, double* %b, i64 %60
%62 = bitcast double* %61 to <8 x double>*
%wide.vec.10 = load <8 x double>, <8 x double>* %62, align 8
%strided.vec.10 = shufflevector <8 x double> %wide.vec.10, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%63 = fadd <4 x double> %strided.vec.10, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%64 = getelementptr inbounds double, double* %a, i64 %index.next.9
%65 = bitcast double* %64 to <4 x double>*
store <4 x double> %63, <4 x double>* %65, align 8
%index.next.10 = or i64 %index, 44
%66 = shl i64 %index.next.10, 1
%67 = getelementptr inbounds double, double* %b, i64 %66
%68 = bitcast double* %67 to <8 x double>*
%wide.vec.11 = load <8 x double>, <8 x double>* %68, align 8
%strided.vec.11 = shufflevector <8 x double> %wide.vec.11, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%69 = fadd <4 x double> %strided.vec.11, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%70 = getelementptr inbounds double, double* %a, i64 %index.next.10
%71 = bitcast double* %70 to <4 x double>*
store <4 x double> %69, <4 x double>* %71, align 8
%index.next.11 = or i64 %index, 48
%72 = shl i64 %index.next.11, 1
%73 = getelementptr inbounds double, double* %b, i64 %72
%74 = bitcast double* %73 to <8 x double>*
%wide.vec.12 = load <8 x double>, <8 x double>* %74, align 8
%strided.vec.12 = shufflevector <8 x double> %wide.vec.12, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%75 = fadd <4 x double> %strided.vec.12, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%76 = getelementptr inbounds double, double* %a, i64 %index.next.11
%77 = bitcast double* %76 to <4 x double>*
store <4 x double> %75, <4 x double>* %77, align 8
%index.next.12 = or i64 %index, 52
%78 = shl i64 %index.next.12, 1
%79 = getelementptr inbounds double, double* %b, i64 %78
%80 = bitcast double* %79 to <8 x double>*
%wide.vec.13 = load <8 x double>, <8 x double>* %80, align 8
%strided.vec.13 = shufflevector <8 x double> %wide.vec.13, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%81 = fadd <4 x double> %strided.vec.13, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%82 = getelementptr inbounds double, double* %a, i64 %index.next.12
%83 = bitcast double* %82 to <4 x double>*
store <4 x double> %81, <4 x double>* %83, align 8
%index.next.13 = or i64 %index, 56
%84 = shl i64 %index.next.13, 1
%85 = getelementptr inbounds double, double* %b, i64 %84
%86 = bitcast double* %85 to <8 x double>*
%wide.vec.14 = load <8 x double>, <8 x double>* %86, align 8
%strided.vec.14 = shufflevector <8 x double> %wide.vec.14, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%87 = fadd <4 x double> %strided.vec.14, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%88 = getelementptr inbounds double, double* %a, i64 %index.next.13
%89 = bitcast double* %88 to <4 x double>*
store <4 x double> %87, <4 x double>* %89, align 8
%index.next.14 = or i64 %index, 60
%90 = shl i64 %index.next.14, 1
%91 = getelementptr inbounds double, double* %b, i64 %90
%92 = bitcast double* %91 to <8 x double>*
%wide.vec.15 = load <8 x double>, <8 x double>* %92, align 8
%strided.vec.15 = shufflevector <8 x double> %wide.vec.15, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%93 = fadd <4 x double> %strided.vec.15, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
%94 = getelementptr inbounds double, double* %a, i64 %index.next.14
%95 = bitcast double* %94 to <4 x double>*
store <4 x double> %93, <4 x double>* %95, align 8
%index.next.15 = add nsw i64 %index, 64
%96 = icmp eq i64 %index.next.15, 1600
br i1 %96, label %for.cond.cleanup, label %vector.body
for.cond.cleanup: ; preds = %vector.body
ret void
}
attributes #0 = { nounwind "target-cpu"="a2q" }

View File

@ -1,64 +0,0 @@
; RUN: llc -verify-machineinstrs < %s -mcpu=a2q | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
define <4 x double> @foo(<4 x double>* %a) {
entry:
%r = load <4 x double>, <4 x double>* %a, align 32
ret <4 x double> %r
; CHECK: qvlfdx
; CHECK: blr
}
define <4 x double> @bar(<4 x double>* %a) {
entry:
%r = load <4 x double>, <4 x double>* %a, align 8
%b = getelementptr <4 x double>, <4 x double>* %a, i32 16
%s = load <4 x double>, <4 x double>* %b, align 32
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
; CHECK: qvlpcldx
; CHECK: qvlfdx
; CHECK: qvfperm
; CHECK: blr
}
define <4 x double> @bar1(<4 x double>* %a) {
entry:
%r = load <4 x double>, <4 x double>* %a, align 8
%b = getelementptr <4 x double>, <4 x double>* %a, i32 16
%s = load <4 x double>, <4 x double>* %b, align 8
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}
define <4 x double> @bar2(<4 x double>* %a) {
entry:
%r = load <4 x double>, <4 x double>* %a, align 8
%b = getelementptr <4 x double>, <4 x double>* %a, i32 1
%s = load <4 x double>, <4 x double>* %b, align 32
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}
define <4 x double> @bar3(<4 x double>* %a) {
entry:
%r = load <4 x double>, <4 x double>* %a, align 8
%b = getelementptr <4 x double>, <4 x double>* %a, i32 1
%s = load <4 x double>, <4 x double>* %b, align 8
%t = fadd <4 x double> %r, %s
ret <4 x double> %t
}
define <4 x double> @bar4(<4 x double>* %a) {
entry:
%r = load <4 x double>, <4 x double>* %a, align 8
%b = getelementptr <4 x double>, <4 x double>* %a, i32 1
%s = load <4 x double>, <4 x double>* %b, align 8
%c = getelementptr <4 x double>, <4 x double>* %b, i32 1
%t = load <4 x double>, <4 x double>* %c, align 8
%u = fadd <4 x double> %r, %s
%v = fadd <4 x double> %u, %t
ret <4 x double> %v
}

View File

@ -1,6 +1,4 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-bgq-linux"
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-crbits -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
define void @test() align 2 {
entry:

Some files were not shown because too many files have changed in this diff Show More