forked from OSchip/llvm-project
Revert "[PowerPC] Fix EmitPPCBuiltinExpr to emit arguments once"
This reverts commit 2aae5b1fac
. Because it
breaks tests on windows.
This commit is contained in:
parent
be01af4a0f
commit
fef56f79ac
|
@ -15281,6 +15281,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
|
||||
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
||||
const CallExpr *E) {
|
||||
SmallVector<Value*, 4> Ops;
|
||||
|
||||
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
|
||||
if (E->getArg(i)->getType()->isArrayType())
|
||||
Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
|
||||
else
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(i)));
|
||||
}
|
||||
|
||||
Intrinsic::ID ID = Intrinsic::not_intrinsic;
|
||||
|
||||
|
@ -15307,9 +15315,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
case PPC::BI__builtin_vsx_lxvl:
|
||||
case PPC::BI__builtin_vsx_lxvll:
|
||||
{
|
||||
SmallVector<Value *, 2> Ops;
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
|
||||
BuiltinID == PPC::BI__builtin_vsx_lxvll){
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
|
||||
|
@ -15378,10 +15383,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
case PPC::BI__builtin_vsx_stxvl:
|
||||
case PPC::BI__builtin_vsx_stxvll:
|
||||
{
|
||||
SmallVector<Value *, 3> Ops;
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(2)));
|
||||
if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
|
||||
BuiltinID == PPC::BI__builtin_vsx_stxvll ){
|
||||
Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
|
||||
|
@ -15434,15 +15435,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
// Essentially boils down to performing an unaligned VMX load sequence so
|
||||
// as to avoid crossing a page boundary and then shuffling the elements
|
||||
// into the right side of the vector register.
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
|
||||
int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
|
||||
llvm::Type *ResTy = ConvertType(E->getType());
|
||||
bool IsLE = getTarget().isLittleEndian();
|
||||
|
||||
// If the user wants the entire vector, just load the entire vector.
|
||||
if (NumBytes == 16) {
|
||||
Value *BC = Builder.CreateBitCast(Op0, ResTy->getPointerTo());
|
||||
Value *BC = Builder.CreateBitCast(Ops[0], ResTy->getPointerTo());
|
||||
Value *LD =
|
||||
Builder.CreateLoad(Address(BC, ResTy, CharUnits::fromQuantity(1)));
|
||||
if (!IsLE)
|
||||
|
@ -15460,14 +15459,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
: Intrinsic::ppc_altivec_lvsl);
|
||||
llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
|
||||
Value *HiMem = Builder.CreateGEP(
|
||||
Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
|
||||
Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
|
||||
Int8Ty, Ops[0], ConstantInt::get(Ops[1]->getType(), NumBytes - 1));
|
||||
Value *LoLd = Builder.CreateCall(Lvx, Ops[0], "ld.lo");
|
||||
Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
|
||||
Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
|
||||
Value *Mask1 = Builder.CreateCall(Lvs, Ops[0], "mask1");
|
||||
|
||||
Op0 = IsLE ? HiLd : LoLd;
|
||||
Op1 = IsLE ? LoLd : HiLd;
|
||||
Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
|
||||
Ops.clear();
|
||||
Ops.push_back(IsLE ? HiLd : LoLd);
|
||||
Ops.push_back(IsLE ? LoLd : HiLd);
|
||||
Ops.push_back(Mask1);
|
||||
Value *AllElts = Builder.CreateCall(Vperm, Ops, "shuffle1");
|
||||
Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
|
||||
|
||||
if (IsLE) {
|
||||
|
@ -15488,25 +15489,23 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
|
||||
}
|
||||
case PPC::BI__builtin_vsx_strmb: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
|
||||
int64_t NumBytes = cast<ConstantInt>(Ops[1])->getZExtValue();
|
||||
bool IsLE = getTarget().isLittleEndian();
|
||||
auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
|
||||
// Storing the whole vector, simply store it on BE and reverse bytes and
|
||||
// store on LE.
|
||||
if (Width == 16) {
|
||||
Value *BC = Builder.CreateBitCast(Op0, Op2->getType()->getPointerTo());
|
||||
Value *StVec = Op2;
|
||||
Value *BC =
|
||||
Builder.CreateBitCast(Ops[0], Ops[2]->getType()->getPointerTo());
|
||||
Value *StVec = Ops[2];
|
||||
if (IsLE) {
|
||||
SmallVector<int, 16> RevMask;
|
||||
for (int Idx = 0; Idx < 16; Idx++)
|
||||
RevMask.push_back(15 - Idx);
|
||||
StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
|
||||
StVec = Builder.CreateShuffleVector(Ops[2], Ops[2], RevMask);
|
||||
}
|
||||
return Builder.CreateStore(
|
||||
StVec, Address(BC, Op2->getType(), CharUnits::fromQuantity(1)));
|
||||
StVec, Address(BC, Ops[2]->getType(), CharUnits::fromQuantity(1)));
|
||||
}
|
||||
auto *ConvTy = Int64Ty;
|
||||
unsigned NumElts = 0;
|
||||
|
@ -15531,9 +15530,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
break;
|
||||
}
|
||||
Value *Vec = Builder.CreateBitCast(
|
||||
Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
|
||||
Value *Ptr =
|
||||
Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
|
||||
Ops[2], llvm::FixedVectorType::get(ConvTy, NumElts));
|
||||
Value *Ptr = Builder.CreateGEP(Int8Ty, Ops[0],
|
||||
ConstantInt::get(Int64Ty, Offset));
|
||||
Value *PtrBC = Builder.CreateBitCast(Ptr, ConvTy->getPointerTo());
|
||||
Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
|
||||
if (IsLE && Width > 1) {
|
||||
|
@ -15607,20 +15606,17 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
case PPC::BI__builtin_altivec_vec_replace_elt:
|
||||
case PPC::BI__builtin_altivec_vec_replace_unaligned: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
// The third argument of vec_replace_elt and vec_replace_unaligned must
|
||||
// be a compile time constant and will be emitted either to the vinsw
|
||||
// or vinsd instruction.
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
|
||||
assert(ArgCI &&
|
||||
"Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
|
||||
llvm::Type *ResultType = ConvertType(E->getType());
|
||||
llvm::Function *F = nullptr;
|
||||
Value *Call = nullptr;
|
||||
int64_t ConstArg = ArgCI->getSExtValue();
|
||||
unsigned ArgWidth = Op1->getType()->getPrimitiveSizeInBits();
|
||||
unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
|
||||
bool Is32Bit = false;
|
||||
assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
|
||||
// The input to vec_replace_elt is an element index, not a byte index.
|
||||
|
@ -15642,24 +15638,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
if (getTarget().isLittleEndian())
|
||||
ConstArg = 8 - ConstArg;
|
||||
}
|
||||
Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
|
||||
Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
|
||||
// Depending on ArgWidth, the input vector could be a float or a double.
|
||||
// If the input vector is a float type, bitcast the inputs to integers. Or,
|
||||
// if the input vector is a double, bitcast the inputs to 64-bit integers.
|
||||
if (!Op1->getType()->isIntegerTy(ArgWidth)) {
|
||||
Op0 = Builder.CreateBitCast(
|
||||
Op0, Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
|
||||
: llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Op1 = Builder.CreateBitCast(Op1, Is32Bit ? Int32Ty : Int64Ty);
|
||||
if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
|
||||
Ops[0] = Builder.CreateBitCast(
|
||||
Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
|
||||
: llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
|
||||
}
|
||||
// Emit the call to vinsw or vinsd.
|
||||
Call = Builder.CreateCall(F, {Op0, Op1, Op2});
|
||||
Call = Builder.CreateCall(F, Ops);
|
||||
// Depending on the builtin, bitcast to the approriate result type.
|
||||
if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
|
||||
!Op1->getType()->isIntegerTy())
|
||||
!Ops[1]->getType()->isIntegerTy())
|
||||
return Builder.CreateBitCast(Call, ResultType);
|
||||
else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
|
||||
Op1->getType()->isIntegerTy())
|
||||
Ops[1]->getType()->isIntegerTy())
|
||||
return Call;
|
||||
else
|
||||
return Builder.CreateBitCast(Call,
|
||||
|
@ -15676,15 +15672,15 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
case PPC::BI__builtin_altivec_vadduqm:
|
||||
case PPC::BI__builtin_altivec_vsubuqm: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
|
||||
Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
|
||||
Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
|
||||
Ops[0] =
|
||||
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int128Ty, 1));
|
||||
Ops[1] =
|
||||
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int128Ty, 1));
|
||||
if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
|
||||
return Builder.CreateAdd(Op0, Op1, "vadduqm");
|
||||
return Builder.CreateAdd(Ops[0], Ops[1], "vadduqm");
|
||||
else
|
||||
return Builder.CreateSub(Op0, Op1, "vsubuqm");
|
||||
return Builder.CreateSub(Ops[0], Ops[1], "vsubuqm");
|
||||
}
|
||||
// Rotate and insert under mask operation.
|
||||
// __rldimi(rs, is, shift, mask)
|
||||
|
@ -15693,37 +15689,29 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
// (rotl(rs, shift) & mask) | (is & ~mask)
|
||||
case PPC::BI__builtin_ppc_rldimi:
|
||||
case PPC::BI__builtin_ppc_rlwimi: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
Value *Op3 = EmitScalarExpr(E->getArg(3));
|
||||
llvm::Type *Ty = Op0->getType();
|
||||
llvm::Type *Ty = Ops[0]->getType();
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
|
||||
if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
|
||||
Op2 = Builder.CreateZExt(Op2, Int64Ty);
|
||||
Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
|
||||
Value *X = Builder.CreateAnd(Shift, Op3);
|
||||
Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
|
||||
Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
|
||||
Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[2]});
|
||||
Value *X = Builder.CreateAnd(Shift, Ops[3]);
|
||||
Value *Y = Builder.CreateAnd(Ops[1], Builder.CreateNot(Ops[3]));
|
||||
return Builder.CreateOr(X, Y);
|
||||
}
|
||||
// Rotate and insert under mask operation.
|
||||
// __rlwnm(rs, shift, mask)
|
||||
// rotl(rs, shift) & mask
|
||||
case PPC::BI__builtin_ppc_rlwnm: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
llvm::Type *Ty = Op0->getType();
|
||||
llvm::Type *Ty = Ops[0]->getType();
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
|
||||
Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
|
||||
return Builder.CreateAnd(Shift, Op2);
|
||||
Value *Shift = Builder.CreateCall(F, {Ops[0], Ops[0], Ops[1]});
|
||||
return Builder.CreateAnd(Shift, Ops[2]);
|
||||
}
|
||||
case PPC::BI__builtin_ppc_poppar4:
|
||||
case PPC::BI__builtin_ppc_poppar8: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Type *ArgType = Op0->getType();
|
||||
llvm::Type *ArgType = Ops[0]->getType();
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
|
||||
Value *Tmp = Builder.CreateCall(F, Op0);
|
||||
Value *Tmp = Builder.CreateCall(F, Ops[0]);
|
||||
|
||||
llvm::Type *ResultType = ConvertType(E->getType());
|
||||
Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
|
||||
|
@ -15733,12 +15721,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
return Result;
|
||||
}
|
||||
case PPC::BI__builtin_ppc_cmpb: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
if (getTarget().getTriple().isPPC64()) {
|
||||
Function *F =
|
||||
CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
|
||||
return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
|
||||
return Builder.CreateCall(F, Ops, "cmpb");
|
||||
}
|
||||
// For 32 bit, emit the code as below:
|
||||
// %conv = trunc i64 %a to i32
|
||||
|
@ -15756,13 +15742,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
// ret i64 %or
|
||||
Function *F =
|
||||
CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
|
||||
Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
|
||||
Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
|
||||
Value *ArgOneLo = Builder.CreateTrunc(Ops[0], Int32Ty);
|
||||
Value *ArgTwoLo = Builder.CreateTrunc(Ops[1], Int32Ty);
|
||||
Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
|
||||
Value *ArgOneHi =
|
||||
Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
|
||||
Builder.CreateTrunc(Builder.CreateLShr(Ops[0], ShiftAmt), Int32Ty);
|
||||
Value *ArgTwoHi =
|
||||
Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
|
||||
Builder.CreateTrunc(Builder.CreateLShr(Ops[1], ShiftAmt), Int32Ty);
|
||||
Value *ResLo = Builder.CreateZExt(
|
||||
Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
|
||||
Value *ResHiShift = Builder.CreateZExt(
|
||||
|
@ -15856,32 +15842,27 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
return FDiv;
|
||||
}
|
||||
case PPC::BI__builtin_ppc_alignx: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
|
||||
ConstantInt *AlignmentCI = cast<ConstantInt>(Ops[0]);
|
||||
if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
|
||||
AlignmentCI = ConstantInt::get(AlignmentCI->getType(),
|
||||
llvm::Value::MaximumAlignment);
|
||||
|
||||
emitAlignmentAssumption(Op1, E->getArg(1),
|
||||
emitAlignmentAssumption(Ops[1], E->getArg(1),
|
||||
/*The expr loc is sufficient.*/ SourceLocation(),
|
||||
AlignmentCI, nullptr);
|
||||
return Op1;
|
||||
return Ops[1];
|
||||
}
|
||||
case PPC::BI__builtin_ppc_rdlam: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
llvm::Type *Ty = Op0->getType();
|
||||
Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
|
||||
llvm::Type *Ty = Ops[0]->getType();
|
||||
Value *ShiftAmt = Builder.CreateIntCast(Ops[1], Ty, false);
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
|
||||
Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
|
||||
return Builder.CreateAnd(Rotate, Op2);
|
||||
Value *Rotate = Builder.CreateCall(F, {Ops[0], Ops[0], ShiftAmt});
|
||||
return Builder.CreateAnd(Rotate, Ops[2]);
|
||||
}
|
||||
case PPC::BI__builtin_ppc_load2r: {
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
|
||||
Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
|
||||
Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
|
||||
Value *LoadIntrinsic = Builder.CreateCall(F, Ops);
|
||||
return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
|
||||
}
|
||||
// FMA variations
|
||||
|
@ -15943,14 +15924,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
|
||||
case PPC::BI__builtin_vsx_insertword: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
|
||||
|
||||
// Third argument is a compile time constant int. It must be clamped to
|
||||
// to the range [0, 12].
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
|
||||
assert(ArgCI &&
|
||||
"Third arg to xxinsertw intrinsic must be constant integer");
|
||||
const int64_t MaxIndex = 12;
|
||||
|
@ -15961,38 +15939,40 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
// word from the first argument, and inserts it in the second argument. The
|
||||
// instruction extracts the word from its second input register and inserts
|
||||
// it into its first input register, so swap the first and second arguments.
|
||||
std::swap(Op0, Op1);
|
||||
std::swap(Ops[0], Ops[1]);
|
||||
|
||||
// Need to cast the second argument from a vector of unsigned int to a
|
||||
// vector of long long.
|
||||
Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Ops[1] =
|
||||
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
|
||||
if (getTarget().isLittleEndian()) {
|
||||
// Reverse the double words in the vector we will extract from.
|
||||
Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
|
||||
Ops[0] =
|
||||
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0});
|
||||
|
||||
// Reverse the index.
|
||||
Index = MaxIndex - Index;
|
||||
}
|
||||
|
||||
// Intrinsic expects the first arg to be a vector of int.
|
||||
Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
|
||||
Op2 = ConstantInt::getSigned(Int32Ty, Index);
|
||||
return Builder.CreateCall(F, {Op0, Op1, Op2});
|
||||
Ops[0] =
|
||||
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
|
||||
Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
|
||||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_vsx_extractuword: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
|
||||
|
||||
// Intrinsic expects the first argument to be a vector of doublewords.
|
||||
Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Ops[0] =
|
||||
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
|
||||
// The second argument is a compile time constant int that needs to
|
||||
// be clamped to the range [0, 12].
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
|
||||
assert(ArgCI &&
|
||||
"Second Arg to xxextractuw intrinsic must be a constant integer!");
|
||||
const int64_t MaxIndex = 12;
|
||||
|
@ -16001,30 +15981,29 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
if (getTarget().isLittleEndian()) {
|
||||
// Reverse the index.
|
||||
Index = MaxIndex - Index;
|
||||
Op1 = ConstantInt::getSigned(Int32Ty, Index);
|
||||
Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
|
||||
|
||||
// Emit the call, then reverse the double words of the results vector.
|
||||
Value *Call = Builder.CreateCall(F, {Op0, Op1});
|
||||
Value *Call = Builder.CreateCall(F, Ops);
|
||||
|
||||
Value *ShuffleCall =
|
||||
Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
|
||||
return ShuffleCall;
|
||||
} else {
|
||||
Op1 = ConstantInt::getSigned(Int32Ty, Index);
|
||||
return Builder.CreateCall(F, {Op0, Op1});
|
||||
Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
|
||||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_vsx_xxpermdi: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
|
||||
assert(ArgCI && "Third arg must be constant integer!");
|
||||
|
||||
unsigned Index = ArgCI->getZExtValue();
|
||||
Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Ops[0] =
|
||||
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
Ops[1] =
|
||||
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
|
||||
|
||||
// Account for endianness by treating this as just a shuffle. So we use the
|
||||
// same indices for both LE and BE in order to produce expected results in
|
||||
|
@ -16033,21 +16012,21 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
int ElemIdx1 = 2 + (Index & 1);
|
||||
|
||||
int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
|
||||
Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
|
||||
Value *ShuffleCall =
|
||||
Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
|
||||
QualType BIRetType = E->getType();
|
||||
auto RetTy = ConvertType(BIRetType);
|
||||
return Builder.CreateBitCast(ShuffleCall, RetTy);
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_vsx_xxsldwi: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
Value *Op2 = EmitScalarExpr(E->getArg(2));
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
|
||||
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
|
||||
assert(ArgCI && "Third argument must be a compile time constant");
|
||||
unsigned Index = ArgCI->getZExtValue() & 0x3;
|
||||
Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
|
||||
Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
|
||||
Ops[0] =
|
||||
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
|
||||
Ops[1] =
|
||||
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4));
|
||||
|
||||
// Create a shuffle mask
|
||||
int ElemIdx0;
|
||||
|
@ -16071,31 +16050,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
|
||||
int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
|
||||
Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
|
||||
Value *ShuffleCall =
|
||||
Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts);
|
||||
QualType BIRetType = E->getType();
|
||||
auto RetTy = ConvertType(BIRetType);
|
||||
return Builder.CreateBitCast(ShuffleCall, RetTy);
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_pack_vector_int128: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
bool isLittleEndian = getTarget().isLittleEndian();
|
||||
Value *UndefValue =
|
||||
llvm::UndefValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
|
||||
llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2));
|
||||
Value *Res = Builder.CreateInsertElement(
|
||||
UndefValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
|
||||
Res = Builder.CreateInsertElement(Res, Op1,
|
||||
UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
|
||||
Res = Builder.CreateInsertElement(Res, Ops[1],
|
||||
(uint64_t)(isLittleEndian ? 0 : 1));
|
||||
return Builder.CreateBitCast(Res, ConvertType(E->getType()));
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_unpack_vector_int128: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
ConstantInt *Index = cast<ConstantInt>(Op1);
|
||||
ConstantInt *Index = cast<ConstantInt>(Ops[1]);
|
||||
Value *Unpacked = Builder.CreateBitCast(
|
||||
Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
|
||||
Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
|
||||
|
||||
if (getTarget().isLittleEndian())
|
||||
Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
|
||||
|
@ -16105,9 +16081,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
|
||||
case PPC::BI__builtin_ppc_sthcx: {
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
|
||||
Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
|
||||
Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
|
||||
return Builder.CreateCall(F, {Op0, Op1});
|
||||
Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
|
||||
Ops[1] = Builder.CreateSExt(Ops[1], Int32Ty);
|
||||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
|
||||
// The PPC MMA builtins take a pointer to a __vector_quad as an argument.
|
||||
|
@ -16120,12 +16096,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
case PPC::BI__builtin_##Name:
|
||||
#include "clang/Basic/BuiltinsPPC.def"
|
||||
{
|
||||
SmallVector<Value *, 4> Ops;
|
||||
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
|
||||
if (E->getArg(i)->getType()->isArrayType())
|
||||
Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
|
||||
else
|
||||
Ops.push_back(EmitScalarExpr(E->getArg(i)));
|
||||
// The first argument of these two builtins is a pointer used to store their
|
||||
// result. However, the llvm intrinsics return their result in multiple
|
||||
// return values. So, here we emit code extracting these values from the
|
||||
|
@ -16210,9 +16180,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
|
||||
LValue LV = MakeAddrLValue(Addr, AtomicTy);
|
||||
auto Pair = EmitAtomicCompareExchange(
|
||||
LV, RValue::get(OldVal), RValue::get(EmitScalarExpr(E->getArg(2))),
|
||||
E->getExprLoc(), llvm::AtomicOrdering::Monotonic,
|
||||
llvm::AtomicOrdering::Monotonic, true);
|
||||
LV, RValue::get(OldVal), RValue::get(Ops[2]), E->getExprLoc(),
|
||||
llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
|
||||
// Unlike c11's atomic_compare_exchange, accroding to
|
||||
// https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
|
||||
// > In either case, the contents of the memory location specified by addr
|
||||
|
@ -16255,37 +16224,34 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
? Int32Ty
|
||||
: Int64Ty;
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
|
||||
return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
|
||||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
case PPC::BI__builtin_ppc_mtspr: {
|
||||
llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
|
||||
? Int32Ty
|
||||
: Int64Ty;
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
|
||||
return Builder.CreateCall(
|
||||
F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
|
||||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
case PPC::BI__builtin_ppc_popcntb: {
|
||||
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Type *ArgType = ArgValue->getType();
|
||||
Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
|
||||
return Builder.CreateCall(F, {ArgValue}, "popcntb");
|
||||
return Builder.CreateCall(F, Ops, "popcntb");
|
||||
}
|
||||
case PPC::BI__builtin_ppc_mtfsf: {
|
||||
// The builtin takes a uint32 that needs to be cast to an
|
||||
// f64 to be passed to the intrinsic.
|
||||
Value *Cast = Builder.CreateUIToFP(EmitScalarExpr(E->getArg(1)), DoubleTy);
|
||||
Value *Cast = Builder.CreateUIToFP(Ops[1], DoubleTy);
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
|
||||
return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), Cast}, "");
|
||||
return Builder.CreateCall(F, {Ops[0], Cast}, "");
|
||||
}
|
||||
|
||||
case PPC::BI__builtin_ppc_swdiv_nochk:
|
||||
case PPC::BI__builtin_ppc_swdivs_nochk: {
|
||||
FastMathFlags FMF = Builder.getFastMathFlags();
|
||||
Builder.getFastMathFlags().setFast();
|
||||
Value *FDiv =
|
||||
Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
|
||||
EmitScalarExpr(E->getArg(1)), "swdiv_nochk");
|
||||
Value *FDiv = Builder.CreateFDiv(Ops[0], Ops[1], "swdiv_nochk");
|
||||
Builder.getFastMathFlags() &= (FMF);
|
||||
return FDiv;
|
||||
}
|
||||
|
@ -16325,9 +16291,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
Intrinsic::experimental_constrained_sqrt))
|
||||
.getScalarVal();
|
||||
case PPC::BI__builtin_ppc_test_data_class: {
|
||||
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
||||
llvm::Type *ArgType = Op0->getType();
|
||||
llvm::Type *ArgType = EmitScalarExpr(E->getArg(0))->getType();
|
||||
unsigned IntrinsicID;
|
||||
if (ArgType->isDoubleTy())
|
||||
IntrinsicID = Intrinsic::ppc_test_data_class_d;
|
||||
|
@ -16335,43 +16299,24 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
IntrinsicID = Intrinsic::ppc_test_data_class_f;
|
||||
else
|
||||
llvm_unreachable("Invalid Argument Type");
|
||||
return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), {Op0, Op1},
|
||||
return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops,
|
||||
"test_data_class");
|
||||
}
|
||||
case PPC::BI__builtin_ppc_maxfe:
|
||||
return Builder.CreateCall(
|
||||
CGM.getIntrinsic(Intrinsic::ppc_maxfe),
|
||||
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
|
||||
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
|
||||
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), Ops);
|
||||
case PPC::BI__builtin_ppc_maxfl:
|
||||
return Builder.CreateCall(
|
||||
CGM.getIntrinsic(Intrinsic::ppc_maxfl),
|
||||
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
|
||||
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
|
||||
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), Ops);
|
||||
case PPC::BI__builtin_ppc_maxfs:
|
||||
return Builder.CreateCall(
|
||||
CGM.getIntrinsic(Intrinsic::ppc_maxfs),
|
||||
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
|
||||
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
|
||||
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), Ops);
|
||||
case PPC::BI__builtin_ppc_minfe:
|
||||
return Builder.CreateCall(
|
||||
CGM.getIntrinsic(Intrinsic::ppc_minfe),
|
||||
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
|
||||
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
|
||||
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), Ops);
|
||||
case PPC::BI__builtin_ppc_minfl:
|
||||
return Builder.CreateCall(
|
||||
CGM.getIntrinsic(Intrinsic::ppc_minfl),
|
||||
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
|
||||
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
|
||||
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), Ops);
|
||||
case PPC::BI__builtin_ppc_minfs:
|
||||
return Builder.CreateCall(
|
||||
CGM.getIntrinsic(Intrinsic::ppc_minfs),
|
||||
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
|
||||
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
|
||||
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), Ops);
|
||||
case PPC::BI__builtin_ppc_swdiv:
|
||||
case PPC::BI__builtin_ppc_swdivs:
|
||||
return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
|
||||
EmitScalarExpr(E->getArg(1)), "swdiv");
|
||||
return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,9 +18,11 @@ extern vector double f;
|
|||
// CHECK-LABEL: @test_flags_recipdivf(
|
||||
// CHECK: [[TMP0:%.*]] = load <4 x float>, <4 x float>* @a, align 16
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* @b, align 16
|
||||
// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <4 x float> [[TMP0]], [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* @c, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RECIPDIV]], [[TMP2]]
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* @a, align 16
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* @b, align 16
|
||||
// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <4 x float> [[TMP2]], [[TMP3]]
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* @c, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RECIPDIV]], [[TMP4]]
|
||||
// CHECK-NEXT: ret <4 x float> [[ADD]]
|
||||
//
|
||||
vector float test_flags_recipdivf() {
|
||||
|
@ -30,9 +32,11 @@ vector float test_flags_recipdivf() {
|
|||
// CHECK-LABEL: @test_flags_recipdivd(
|
||||
// CHECK: [[TMP0:%.*]] = load <2 x double>, <2 x double>* @d, align 16
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* @e, align 16
|
||||
// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <2 x double> [[TMP0]], [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* @f, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RECIPDIV]], [[TMP2]]
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* @d, align 16
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* @e, align 16
|
||||
// CHECK-NEXT: [[RECIPDIV:%.*]] = fdiv fast <2 x double> [[TMP2]], [[TMP3]]
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* @f, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RECIPDIV]], [[TMP4]]
|
||||
// CHECK-NEXT: ret <2 x double> [[ADD]]
|
||||
//
|
||||
vector double test_flags_recipdivd() {
|
||||
|
@ -41,10 +45,11 @@ vector double test_flags_recipdivd() {
|
|||
|
||||
// CHECK-LABEL: @test_flags_rsqrtf(
|
||||
// CHECK: [[TMP0:%.*]] = load <4 x float>, <4 x float>* @a, align 16
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
||||
// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* @b, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RSQRT]], [[TMP2]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* @a, align 16
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]])
|
||||
// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP2]]
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* @b, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RSQRT]], [[TMP3]]
|
||||
// CHECK-NEXT: ret <4 x float> [[ADD]]
|
||||
//
|
||||
vector float test_flags_rsqrtf() {
|
||||
|
@ -53,10 +58,11 @@ vector float test_flags_rsqrtf() {
|
|||
|
||||
// CHECK-LABEL: @test_flags_rsqrtd(
|
||||
// CHECK: [[TMP0:%.*]] = load <2 x double>, <2 x double>* @d, align 16
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]])
|
||||
// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* @e, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RSQRT]], [[TMP2]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* @d, align 16
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]])
|
||||
// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP2]]
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* @e, align 16
|
||||
// CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RSQRT]], [[TMP3]]
|
||||
// CHECK-NEXT: ret <2 x double> [[ADD]]
|
||||
//
|
||||
vector double test_flags_rsqrtd() {
|
||||
|
|
|
@ -1,259 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
|
||||
// RUN: -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
|
||||
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-BE
|
||||
|
||||
// CHECK-LABEL: @testVQLocal(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ1]], align 64
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[VQ2]], align 64
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP7]], <512 x i1>* [[VQ3]], align 64
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = load <512 x i1>, <512 x i1>* [[VQ3]], align 64
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP8]], <512 x i1>* [[TMP9]], align 64
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CHECK-BE-LABEL: @testVQLocal(
|
||||
// CHECK-BE-NEXT: entry:
|
||||
// CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
|
||||
// CHECK-BE-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-BE-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-BE-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-BE-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-BE-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-BE-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
|
||||
// CHECK-BE-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ1]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
||||
// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[VQ2]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
|
||||
// CHECK-BE-NEXT: store <512 x i1> [[TMP7]], <512 x i1>* [[VQ3]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP8:%.*]] = load <512 x i1>, <512 x i1>* [[VQ3]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP9:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-BE-NEXT: store <512 x i1> [[TMP8]], <512 x i1>* [[TMP9]], align 64
|
||||
// CHECK-BE-NEXT: ret void
|
||||
//
|
||||
void testVQLocal(int *ptr, vector unsigned char vc) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
__vector_quad vq1 = *vqp;
|
||||
__vector_quad vq2;
|
||||
__builtin_mma_xxsetaccz(&vq2);
|
||||
__vector_quad vq3;
|
||||
__builtin_mma_xvi4ger8(&vq3, vc, vc);
|
||||
*vqp = vq3;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPLocal(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32
|
||||
// CHECK-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32
|
||||
// CHECK-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32
|
||||
// CHECK-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP1]], align 32
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP6]], <256 x i1>* [[VP2]], align 64
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP8]], <16 x i8> [[TMP7]])
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP9]], <256 x i1>* [[VP2]], align 64
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP12]], <512 x i1>* [[VQ]], align 64
|
||||
// CHECK-NEXT: [[TMP13:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
|
||||
// CHECK-NEXT: [[TMP14:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP13]], <256 x i1>* [[TMP14]], align 32
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CHECK-BE-LABEL: @testVPLocal(
|
||||
// CHECK-BE-NEXT: entry:
|
||||
// CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
|
||||
// CHECK-BE-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-BE-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32
|
||||
// CHECK-BE-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32
|
||||
// CHECK-BE-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32
|
||||
// CHECK-BE-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-BE-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-BE-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
|
||||
// CHECK-BE-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP1]], align 32
|
||||
// CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
|
||||
// CHECK-BE-NEXT: store <256 x i1> [[TMP6]], <256 x i1>* [[VP2]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
|
||||
// CHECK-BE-NEXT: store <256 x i1> [[TMP9]], <256 x i1>* [[VP2]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP10:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
|
||||
// CHECK-BE-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
|
||||
// CHECK-BE-NEXT: store <512 x i1> [[TMP12]], <512 x i1>* [[VQ]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP13:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
|
||||
// CHECK-BE-NEXT: [[TMP14:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-BE-NEXT: store <256 x i1> [[TMP13]], <256 x i1>* [[TMP14]], align 32
|
||||
// CHECK-BE-NEXT: ret void
|
||||
//
|
||||
void testVPLocal(int *ptr, vector unsigned char vc) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
__vector_pair vp1 = *vpp;
|
||||
__vector_pair vp2;
|
||||
__builtin_vsx_assemble_pair(&vp2, vc, vc);
|
||||
__builtin_vsx_build_pair(&vp2, vc, vc);
|
||||
__vector_pair vp3;
|
||||
__vector_quad vq;
|
||||
__builtin_mma_xvf64ger(&vq, vp3, vc);
|
||||
*vpp = vp3;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testRestrictQualifiedPointer2(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
|
||||
// CHECK-NEXT: store <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CHECK-BE-LABEL: @testRestrictQualifiedPointer2(
|
||||
// CHECK-BE-NEXT: entry:
|
||||
// CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
|
||||
// CHECK-BE-NEXT: store <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
|
||||
// CHECK-BE-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
|
||||
// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
|
||||
// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
|
||||
// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
|
||||
// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
|
||||
// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
|
||||
// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
|
||||
// CHECK-BE-NEXT: ret void
|
||||
//
|
||||
void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) {
|
||||
vector float arr[4];
|
||||
__builtin_mma_disassemble_acc(arr, acc);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVolatileQualifiedPointer2(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
|
||||
// CHECK-NEXT: store volatile <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
|
||||
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
|
||||
// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
|
||||
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
|
||||
// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
|
||||
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
|
||||
// CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
// CHECK-BE-LABEL: @testVolatileQualifiedPointer2(
|
||||
// CHECK-BE-NEXT: entry:
|
||||
// CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
|
||||
// CHECK-BE-NEXT: store volatile <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
|
||||
// CHECK-BE-NEXT: [[TMP0:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP1:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
|
||||
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
|
||||
// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
|
||||
// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
|
||||
// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
|
||||
// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
|
||||
// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
|
||||
// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
|
||||
// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
|
||||
// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
|
||||
// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
|
||||
// CHECK-BE-NEXT: ret void
|
||||
//
|
||||
void testVolatileQualifiedPointer2(__vector_quad *__volatile acc) {
|
||||
vector float arr[4];
|
||||
__builtin_mma_disassemble_acc(arr, acc);
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// REQUIRES: powerpc-registered-target
|
||||
// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu \
|
||||
// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu \
|
||||
// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s
|
||||
|
||||
// The argument expression must not be emitted multiple times
|
||||
|
||||
// CHECK-LABEL: @test_fric(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[D:%.*]] = alloca double, align 8
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca double, align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D]], align 8
|
||||
// CHECK-NEXT: store double [[TMP0]], double* [[TMP]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[TMP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.rint.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_fric() {
|
||||
__fric(({double d; d;}));
|
||||
}
|
|
@ -2213,6 +2213,8 @@ vector double xxsldwi_should_not_assert(vector double a, vector double b) {
|
|||
|
||||
void test_vector_cpsgn_float(vector float a, vector float b) {
|
||||
// CHECK-LABEL: test_vector_cpsgn_float
|
||||
// CHECK-DAG: load{{.*}}%__a
|
||||
// CHECK-DAG: load{{.*}}%__b
|
||||
// CHECK-NOT: SEPARATOR
|
||||
// CHECK-DAG: [[RA:%[0-9]+]] = load <4 x float>, <4 x float>* %__a.addr
|
||||
// CHECK-DAG: [[RB:%[0-9]+]] = load <4 x float>, <4 x float>* %__b.addr
|
||||
|
@ -2222,6 +2224,8 @@ void test_vector_cpsgn_float(vector float a, vector float b) {
|
|||
|
||||
void test_vector_cpsgn_double(vector double a, vector double b) {
|
||||
// CHECK-LABEL: test_vector_cpsgn_double
|
||||
// CHECK-DAG: load{{.*}}%__a
|
||||
// CHECK-DAG: load{{.*}}%__b
|
||||
// CHECK-NOT: SEPARATOR
|
||||
// CHECK-DAG: [[RA:%[0-9]+]] = load <2 x double>, <2 x double>* %__a.addr
|
||||
// CHECK-DAG: [[RB:%[0-9]+]] = load <2 x double>, <2 x double>* %__b.addr
|
||||
|
@ -2231,6 +2235,8 @@ void test_vector_cpsgn_double(vector double a, vector double b) {
|
|||
|
||||
void test_builtin_xvcpsgnsp(vector float a, vector float b) {
|
||||
// CHECK-LABEL: test_builtin_xvcpsgnsp
|
||||
// CHECK-DAG: load{{.*}}%a
|
||||
// CHECK-DAG: load{{.*}}%b
|
||||
// CHECK-NOT: SEPARATOR
|
||||
// CHECK-DAG: [[RA:%[0-9]+]] = load <4 x float>, <4 x float>* %a.addr
|
||||
// CHECK-DAG: [[RB:%[0-9]+]] = load <4 x float>, <4 x float>* %b.addr
|
||||
|
@ -2240,6 +2246,8 @@ void test_builtin_xvcpsgnsp(vector float a, vector float b) {
|
|||
|
||||
void test_builtin_xvcpsgndp(vector double a, vector double b) {
|
||||
// CHECK-LABEL: test_builtin_xvcpsgndp
|
||||
// CHECK-DAG: load{{.*}}%a
|
||||
// CHECK-DAG: load{{.*}}%b
|
||||
// CHECK-NOT: SEPARATOR
|
||||
// CHECK-DAG: [[RA:%[0-9]+]] = load <2 x double>, <2 x double>* %a.addr
|
||||
// CHECK-DAG: [[RB:%[0-9]+]] = load <2 x double>, <2 x double>* %b.addr
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i32* [[A_ADDR]], i32 [[TMP0]], i32 [[TMP1]] monotonic monotonic, align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i32* [[A_ADDR]], i32 [[TMP1]], i32 [[TMP0]] monotonic monotonic, align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
|
||||
// CHECK-NEXT: store i32 [[TMP3]], i32* [[B_ADDR]], align 4
|
||||
|
@ -36,9 +36,9 @@ int test_builtin_ppc_compare_and_swap(int a, int b, int c) {
|
|||
// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[C:%.*]], i64* [[C_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[C_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i64* [[A_ADDR]], i64 [[TMP0]], i64 [[TMP1]] monotonic monotonic, align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i64* [[A_ADDR]], i64 [[TMP1]], i64 [[TMP0]] monotonic monotonic, align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
|
||||
// CHECK-NEXT: store i64 [[TMP3]], i64* [[B_ADDR]], align 8
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_add(int a, int b) {
|
||||
|
@ -26,7 +27,8 @@ void test_builtin_ppc_fetch_and_add(int a, int b) {
|
|||
// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_addlp(long a, long b) {
|
||||
|
@ -39,7 +41,8 @@ void test_builtin_ppc_fetch_and_addlp(long a, long b) {
|
|||
// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_and(unsigned int a, unsigned int b) {
|
||||
|
@ -52,7 +55,8 @@ void test_builtin_ppc_fetch_and_and(unsigned int a, unsigned int b) {
|
|||
// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_andlp(unsigned long a, unsigned long b) {
|
||||
|
@ -65,7 +69,8 @@ void test_builtin_ppc_fetch_and_andlp(unsigned long a, unsigned long b) {
|
|||
// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw or i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_or(unsigned int a, unsigned int b) {
|
||||
|
@ -78,7 +83,8 @@ void test_builtin_ppc_fetch_and_or(unsigned int a, unsigned int b) {
|
|||
// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw or i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_orlp(unsigned long a, unsigned long b) {
|
||||
|
@ -91,7 +97,8 @@ void test_builtin_ppc_fetch_and_orlp(unsigned long a, unsigned long b) {
|
|||
// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg i32* [[A_ADDR]], i32 [[TMP0]] monotonic, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_swap(unsigned int a, unsigned int b) {
|
||||
|
@ -104,7 +111,8 @@ void test_builtin_ppc_fetch_and_swap(unsigned int a, unsigned int b) {
|
|||
// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8
|
||||
// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg i64* [[A_ADDR]], i64 [[TMP0]] monotonic, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_builtin_ppc_fetch_and_swaplp(unsigned long a, unsigned long b) {
|
||||
|
|
|
@ -15,8 +15,9 @@ extern float f;
|
|||
|
||||
// CHECK-LABEL: @test_fric(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.rint.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.rint.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_fric() {
|
||||
return __fric(a);
|
||||
|
@ -24,8 +25,9 @@ double test_fric() {
|
|||
|
||||
// CHECK-LABEL: @test_frim(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.floor.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_frim() {
|
||||
return __frim(a);
|
||||
|
@ -33,8 +35,9 @@ double test_frim() {
|
|||
|
||||
// CHECK-LABEL: @test_frims(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.floor.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_frims() {
|
||||
return __frims(d);
|
||||
|
@ -42,8 +45,9 @@ float test_frims() {
|
|||
|
||||
// CHECK-LABEL: @test_frin(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.round.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.round.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_frin() {
|
||||
return __frin(a);
|
||||
|
@ -51,8 +55,9 @@ double test_frin() {
|
|||
|
||||
// CHECK-LABEL: @test_frins(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.round.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.round.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_frins() {
|
||||
return __frins(d);
|
||||
|
@ -60,8 +65,9 @@ float test_frins() {
|
|||
|
||||
// CHECK-LABEL: @test_frip(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ceil.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_frip() {
|
||||
return __frip(a);
|
||||
|
@ -69,8 +75,9 @@ double test_frip() {
|
|||
|
||||
// CHECK-LABEL: @test_frips(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.ceil.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_frips() {
|
||||
return __frips(d);
|
||||
|
@ -78,8 +85,9 @@ float test_frips() {
|
|||
|
||||
// CHECK-LABEL: @test_friz(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.trunc.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.trunc.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_friz() {
|
||||
return __friz(a);
|
||||
|
@ -87,8 +95,9 @@ double test_friz() {
|
|||
|
||||
// CHECK-LABEL: @test_frizs(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.trunc.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.trunc.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_frizs() {
|
||||
return __frizs(d);
|
||||
|
@ -136,8 +145,9 @@ float test_frsqrtes() {
|
|||
|
||||
// CHECK-LABEL: @test_fsqrt(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_fsqrt() {
|
||||
return __fsqrt(a);
|
||||
|
@ -145,8 +155,9 @@ double test_fsqrt() {
|
|||
|
||||
// CHECK-LABEL: @test_fsqrts(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.sqrt.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_fsqrts() {
|
||||
return __fsqrts(d);
|
||||
|
@ -154,8 +165,9 @@ float test_fsqrts() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_fric(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.rint.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.rint.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_builtin_ppc_fric() {
|
||||
return __builtin_ppc_fric(a);
|
||||
|
@ -163,8 +175,9 @@ double test_builtin_ppc_fric() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_frim(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.floor.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_builtin_ppc_frim() {
|
||||
return __builtin_ppc_frim(a);
|
||||
|
@ -172,8 +185,9 @@ double test_builtin_ppc_frim() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_frims(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.floor.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_builtin_ppc_frims() {
|
||||
return __builtin_ppc_frims(d);
|
||||
|
@ -181,8 +195,9 @@ float test_builtin_ppc_frims() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_frin(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.round.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.round.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_builtin_ppc_frin() {
|
||||
return __builtin_ppc_frin(a);
|
||||
|
@ -190,8 +205,9 @@ double test_builtin_ppc_frin() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_frins(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.round.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.round.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_builtin_ppc_frins() {
|
||||
return __builtin_ppc_frins(d);
|
||||
|
@ -199,8 +215,9 @@ float test_builtin_ppc_frins() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_frip(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ceil.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_builtin_ppc_frip() {
|
||||
return __builtin_ppc_frip(a);
|
||||
|
@ -208,8 +225,9 @@ double test_builtin_ppc_frip() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_frips(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.ceil.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_builtin_ppc_frips() {
|
||||
return __builtin_ppc_frips(d);
|
||||
|
@ -217,8 +235,9 @@ float test_builtin_ppc_frips() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_friz(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.trunc.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.trunc.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_builtin_ppc_friz() {
|
||||
return __builtin_ppc_friz(a);
|
||||
|
@ -226,8 +245,9 @@ double test_builtin_ppc_friz() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_frizs(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.trunc.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.trunc.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_builtin_ppc_frizs() {
|
||||
return __builtin_ppc_frizs(d);
|
||||
|
@ -275,8 +295,9 @@ float test_builtin_ppc_frsqrtes() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_fsqrt(
|
||||
// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[TMP0]])
|
||||
// CHECK-NEXT: ret double [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @a, align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[TMP1]])
|
||||
// CHECK-NEXT: ret double [[TMP2]]
|
||||
//
|
||||
double test_builtin_ppc_fsqrt() {
|
||||
return __builtin_ppc_fsqrt(a);
|
||||
|
@ -284,8 +305,9 @@ double test_builtin_ppc_fsqrt() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_fsqrts(
|
||||
// CHECK: [[TMP0:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.sqrt.f32(float [[TMP1]])
|
||||
// CHECK-NEXT: ret float [[TMP2]]
|
||||
//
|
||||
float test_builtin_ppc_fsqrts() {
|
||||
return __builtin_ppc_fsqrts(d);
|
||||
|
|
|
@ -95,6 +95,7 @@ float fnmadds (float f) {
|
|||
// CHECK-LABEL: @fnmsub(
|
||||
// CHECK: [[D_ADDR:%.*]] = alloca double, align 8
|
||||
// CHECK-NEXT: store double [[D:%.*]], double* [[D_ADDR]], align 8
|
||||
// CHECK-COUNT-3: load double, double* [[D_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[D_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[D_ADDR]], align 8
|
||||
|
@ -108,6 +109,7 @@ double fnmsub (double d) {
|
|||
// CHECK-LABEL: @fnmsubs(
|
||||
// CHECK: [[F_ADDR:%.*]] = alloca float, align 4
|
||||
// CHECK-NEXT: store float [[F:%.*]], float* [[F_ADDR]], align 4
|
||||
// CHECK-COUNT-3: load float, float* [[F_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[F_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[F_ADDR]], align 4
|
||||
|
|
|
@ -14,11 +14,13 @@ extern void *c;
|
|||
|
||||
// CHECK-LABEL: @test_popcntb(
|
||||
// CHECK: [[TMP0:%.*]] = load i64, i64* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @a, align 8
|
||||
// CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]])
|
||||
// CHECK-NEXT: ret i64 [[POPCNTB]]
|
||||
//
|
||||
// CHECK-32-LABEL: @test_popcntb(
|
||||
// CHECK-32: [[TMP0:%.*]] = load i32, i32* @a, align 4
|
||||
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4
|
||||
// CHECK-32-NEXT: [[POPCNTB:%.*]] = call i32 @llvm.ppc.popcntb.i32.i32(i32 [[TMP0]])
|
||||
// CHECK-32-NEXT: ret i32 [[POPCNTB]]
|
||||
//
|
||||
|
@ -196,11 +198,13 @@ void test_dcbz() {
|
|||
|
||||
// CHECK-LABEL: @test_builtin_ppc_popcntb(
|
||||
// CHECK: [[TMP0:%.*]] = load i64, i64* @a, align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @a, align 8
|
||||
// CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]])
|
||||
// CHECK-NEXT: ret i64 [[POPCNTB]]
|
||||
//
|
||||
// CHECK-32-LABEL: @test_builtin_ppc_popcntb(
|
||||
// CHECK-32: [[TMP0:%.*]] = load i32, i32* @a, align 4
|
||||
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4
|
||||
// CHECK-32-NEXT: [[POPCNTB:%.*]] = call i32 @llvm.ppc.popcntb.i32.i32(i32 [[TMP0]])
|
||||
// CHECK-32-NEXT: ret i32 [[POPCNTB]]
|
||||
//
|
||||
|
|
|
@ -1,23 +1,17 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr10 \
|
||||
// RUN: -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: -emit-llvm -O3 -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr9 \
|
||||
// RUN: -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: -emit-llvm -O3 -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr8 \
|
||||
// RUN: -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: -emit-llvm -O3 -o - %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: @test1(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR1_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store <512 x i1>* [[PTR1:%.*]], <512 x i1>** [[PTR1_ADDR]], align 8
|
||||
// CHECK-NEXT: store <512 x i1>* [[PTR2:%.*]], <512 x i1>** [[PTR2_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[PTR1_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP0]], i64 2
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[ADD_PTR]], align 64
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[PTR2_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 1
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP1]], <512 x i1>* [[ADD_PTR1]], align 64
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[PTR1:%.*]], i64 2
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, <512 x i1>* [[ADD_PTR]], align 64, [[TBAA2:!tbaa !.*]]
|
||||
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[PTR2:%.*]], i64 1
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[ADD_PTR1]], align 64, [[TBAA2]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test1(__vector_quad *ptr1, __vector_quad *ptr2) {
|
||||
|
@ -26,422 +20,12 @@ void test1(__vector_quad *ptr1, __vector_quad *ptr2) {
|
|||
|
||||
// CHECK-LABEL: @test2(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR1_ADDR:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR2_ADDR:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store <256 x i1>* [[PTR1:%.*]], <256 x i1>** [[PTR1_ADDR]], align 8
|
||||
// CHECK-NEXT: store <256 x i1>* [[PTR2:%.*]], <256 x i1>** [[PTR2_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>*, <256 x i1>** [[PTR1_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP0]], i64 2
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[ADD_PTR]], align 32
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[PTR2_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 1
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP1]], <256 x i1>* [[ADD_PTR1]], align 32
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[PTR1:%.*]], i64 2
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, <256 x i1>* [[ADD_PTR]], align 32, [[TBAA6:!tbaa !.*]]
|
||||
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[PTR2:%.*]], i64 1
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[ADD_PTR1]], align 32, [[TBAA6]]
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test2(__vector_pair *ptr1, __vector_pair *ptr2) {
|
||||
*(ptr2 + 1) = *(ptr1 + 2);
|
||||
}
|
||||
|
||||
typedef __vector_quad vq_t;
|
||||
// CHECK-LABEL: @testVQTypedef(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[INP_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[OUTP_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQIN:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[VQOUT:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[INP:%.*]], i32** [[INP_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[OUTP:%.*]], i32** [[OUTP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[INP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQIN]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP3]], <512 x i1>** [[VQOUT]], align 8
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQIN]], align 8
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load <512 x i1>, <512 x i1>* [[TMP4]], align 64
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load <512 x i1>*, <512 x i1>** [[VQOUT]], align 8
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVQTypedef(int *inp, int *outp) {
|
||||
vq_t *vqin = (vq_t *)inp;
|
||||
vq_t *vqout = (vq_t *)outp;
|
||||
*vqout = *vqin;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQArg3(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVQArg3(__vector_quad *vq, int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = *vq;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQArg4(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVQArg4(const __vector_quad *const vq, int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = *vq;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQArg5(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VQA_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store <512 x i1>* [[VQA:%.*]], <512 x i1>** [[VQA_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQA_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 0
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[ARRAYIDX]], align 64
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVQArg5(__vector_quad vqa[], int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = vqa[0];
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQArg7(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVQArg7(const vq_t *vq, int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = *vq;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQRet2(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2
|
||||
// CHECK-NEXT: ret <512 x i1>* [[ADD_PTR]]
|
||||
//
|
||||
__vector_quad *testVQRet2(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return vqp + 2;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQRet3(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2
|
||||
// CHECK-NEXT: ret <512 x i1>* [[ADD_PTR]]
|
||||
//
|
||||
const __vector_quad *testVQRet3(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return vqp + 2;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQRet5(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2
|
||||
// CHECK-NEXT: ret <512 x i1>* [[ADD_PTR]]
|
||||
//
|
||||
const vq_t *testVQRet5(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return vqp + 2;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVQSizeofAlignof(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
|
||||
// CHECK-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
|
||||
// CHECK-NEXT: [[SIZET:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[ALIGNT:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[SIZEV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[ALIGNV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
|
||||
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
|
||||
// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ]], align 64
|
||||
// CHECK-NEXT: store i32 64, i32* [[SIZET]], align 4
|
||||
// CHECK-NEXT: store i32 64, i32* [[ALIGNT]], align 4
|
||||
// CHECK-NEXT: store i32 64, i32* [[SIZEV]], align 4
|
||||
// CHECK-NEXT: store i32 64, i32* [[ALIGNV]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[SIZET]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNT]], align 4
|
||||
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP4]], [[TMP5]]
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIZEV]], align 4
|
||||
// CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], [[TMP6]]
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ALIGNV]], align 4
|
||||
// CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP7]]
|
||||
// CHECK-NEXT: ret i32 [[ADD2]]
|
||||
//
|
||||
int testVQSizeofAlignof(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
__vector_quad vq = *vqp;
|
||||
unsigned sizet = sizeof(__vector_quad);
|
||||
unsigned alignt = __alignof__(__vector_quad);
|
||||
unsigned sizev = sizeof(vq);
|
||||
unsigned alignv = __alignof__(vq);
|
||||
return sizet + alignt + sizev + alignv;
|
||||
}
|
||||
|
||||
typedef __vector_pair vp_t;
|
||||
// CHECK-LABEL: @testVPTypedef(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[INP_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[OUTP_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPIN:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[VPOUT:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[INP:%.*]], i32** [[INP_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[OUTP:%.*]], i32** [[OUTP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[INP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPIN]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP3]], <256 x i1>** [[VPOUT]], align 8
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPIN]], align 8
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load <256 x i1>, <256 x i1>* [[TMP4]], align 32
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load <256 x i1>*, <256 x i1>** [[VPOUT]], align 8
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP5]], <256 x i1>* [[TMP6]], align 32
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVPTypedef(int *inp, int *outp) {
|
||||
vp_t *vpin = (vp_t *)inp;
|
||||
vp_t *vpout = (vp_t *)outp;
|
||||
*vpout = *vpin;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPArg3(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVPArg3(__vector_pair *vp, int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = *vp;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPArg4(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVPArg4(const __vector_pair *const vp, int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = *vp;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPArg5(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VPA_ADDR:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store <256 x i1>* [[VPA:%.*]], <256 x i1>** [[VPA_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPA_ADDR]], align 8
|
||||
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 0
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[ARRAYIDX]], align 32
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVPArg5(__vector_pair vpa[], int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = vpa[0];
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPArg7(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void testVPArg7(const vp_t *vp, int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = *vp;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPRet2(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2
|
||||
// CHECK-NEXT: ret <256 x i1>* [[ADD_PTR]]
|
||||
//
|
||||
__vector_pair *testVPRet2(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return vpp + 2;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPRet3(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2
|
||||
// CHECK-NEXT: ret <256 x i1>* [[ADD_PTR]]
|
||||
//
|
||||
const __vector_pair *testVPRet3(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return vpp + 2;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPRet5(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2
|
||||
// CHECK-NEXT: ret <256 x i1>* [[ADD_PTR]]
|
||||
//
|
||||
const vp_t *testVPRet5(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return vpp + 2;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @testVPSizeofAlignof(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
|
||||
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
|
||||
// CHECK-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32
|
||||
// CHECK-NEXT: [[SIZET:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[ALIGNT:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[SIZEV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[ALIGNV:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
|
||||
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
|
||||
// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP]], align 32
|
||||
// CHECK-NEXT: store i32 32, i32* [[SIZET]], align 4
|
||||
// CHECK-NEXT: store i32 32, i32* [[ALIGNT]], align 4
|
||||
// CHECK-NEXT: store i32 32, i32* [[SIZEV]], align 4
|
||||
// CHECK-NEXT: store i32 32, i32* [[ALIGNV]], align 4
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[SIZET]], align 4
|
||||
// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNT]], align 4
|
||||
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP4]], [[TMP5]]
|
||||
// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIZEV]], align 4
|
||||
// CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], [[TMP6]]
|
||||
// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ALIGNV]], align 4
|
||||
// CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP7]]
|
||||
// CHECK-NEXT: ret i32 [[ADD2]]
|
||||
//
|
||||
int testVPSizeofAlignof(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
__vector_pair vp = *vpp;
|
||||
unsigned sizet = sizeof(__vector_pair);
|
||||
unsigned alignt = __alignof__(__vector_pair);
|
||||
unsigned sizev = sizeof(vp);
|
||||
unsigned alignv = __alignof__(vp);
|
||||
return sizet + alignt + sizev + alignv;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,11 @@
|
|||
|
||||
// typedef
|
||||
typedef __vector_quad vq_t;
|
||||
void testVQTypedef(int *inp, int *outp) {
|
||||
vq_t *vqin = (vq_t *)inp;
|
||||
vq_t *vqout = (vq_t *)outp;
|
||||
*vqout = *vqin;
|
||||
}
|
||||
|
||||
// function argument
|
||||
void testVQArg1(__vector_quad vq, int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
|
@ -24,22 +29,57 @@ void testVQArg2(const __vector_quad vq, int *ptr) { // expected-error {{invalid
|
|||
*vqp = vq;
|
||||
}
|
||||
|
||||
void testVQArg3(__vector_quad *vq, int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = *vq;
|
||||
}
|
||||
|
||||
void testVQArg4(const __vector_quad *const vq, int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = *vq;
|
||||
}
|
||||
|
||||
void testVQArg5(__vector_quad vqa[], int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = vqa[0];
|
||||
}
|
||||
|
||||
void testVQArg6(const vq_t vq, int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = vq;
|
||||
}
|
||||
|
||||
void testVQArg7(const vq_t *vq, int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
*vqp = *vq;
|
||||
}
|
||||
|
||||
// function return
|
||||
__vector_quad testVQRet1(int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return *vqp; // expected-error {{invalid use of PPC MMA type}}
|
||||
}
|
||||
|
||||
__vector_quad *testVQRet2(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return vqp + 2;
|
||||
}
|
||||
|
||||
const __vector_quad *testVQRet3(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return vqp + 2;
|
||||
}
|
||||
|
||||
const vq_t testVQRet4(int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return *vqp; // expected-error {{invalid use of PPC MMA type}}
|
||||
}
|
||||
|
||||
const vq_t *testVQRet5(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
return vqp + 2;
|
||||
}
|
||||
|
||||
// global
|
||||
__vector_quad globalvq; // expected-error {{invalid use of PPC MMA type}}
|
||||
const __vector_quad globalvq2; // expected-error {{invalid use of PPC MMA type}}
|
||||
|
@ -47,6 +87,16 @@ __vector_quad *globalvqp;
|
|||
const __vector_quad *const globalvqp2;
|
||||
vq_t globalvq_t; // expected-error {{invalid use of PPC MMA type}}
|
||||
|
||||
// local
|
||||
void testVQLocal(int *ptr, vector unsigned char vc) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
__vector_quad vq1 = *vqp;
|
||||
__vector_quad vq2;
|
||||
__builtin_mma_xxsetaccz(&vq2);
|
||||
__vector_quad vq3;
|
||||
__builtin_mma_xvi4ger8(&vq3, vc, vc);
|
||||
*vqp = vq3;
|
||||
}
|
||||
|
||||
// struct field
|
||||
struct TestVQStruct {
|
||||
|
@ -56,6 +106,17 @@ struct TestVQStruct {
|
|||
__vector_quad *vq;
|
||||
};
|
||||
|
||||
// sizeof / alignof
|
||||
int testVQSizeofAlignof(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
__vector_quad vq = *vqp;
|
||||
unsigned sizet = sizeof(__vector_quad);
|
||||
unsigned alignt = __alignof__(__vector_quad);
|
||||
unsigned sizev = sizeof(vq);
|
||||
unsigned alignv = __alignof__(vq);
|
||||
return sizet + alignt + sizev + alignv;
|
||||
}
|
||||
|
||||
// operators
|
||||
int testVQOperators1(int *ptr) {
|
||||
__vector_quad *vqp = (__vector_quad *)ptr;
|
||||
|
@ -107,6 +168,11 @@ void testVQOperators4(int v, void *ptr) {
|
|||
|
||||
// typedef
|
||||
typedef __vector_pair vp_t;
|
||||
void testVPTypedef(int *inp, int *outp) {
|
||||
vp_t *vpin = (vp_t *)inp;
|
||||
vp_t *vpout = (vp_t *)outp;
|
||||
*vpout = *vpin;
|
||||
}
|
||||
|
||||
// function argument
|
||||
void testVPArg1(__vector_pair vp, int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
|
@ -119,22 +185,57 @@ void testVPArg2(const __vector_pair vp, int *ptr) { // expected-error {{invalid
|
|||
*vpp = vp;
|
||||
}
|
||||
|
||||
void testVPArg3(__vector_pair *vp, int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = *vp;
|
||||
}
|
||||
|
||||
void testVPArg4(const __vector_pair *const vp, int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = *vp;
|
||||
}
|
||||
|
||||
void testVPArg5(__vector_pair vpa[], int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = vpa[0];
|
||||
}
|
||||
|
||||
void testVPArg6(const vp_t vp, int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = vp;
|
||||
}
|
||||
|
||||
void testVPArg7(const vp_t *vp, int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
*vpp = *vp;
|
||||
}
|
||||
|
||||
// function return
|
||||
__vector_pair testVPRet1(int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return *vpp; // expected-error {{invalid use of PPC MMA type}}
|
||||
}
|
||||
|
||||
__vector_pair *testVPRet2(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return vpp + 2;
|
||||
}
|
||||
|
||||
const __vector_pair *testVPRet3(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return vpp + 2;
|
||||
}
|
||||
|
||||
const vp_t testVPRet4(int *ptr) { // expected-error {{invalid use of PPC MMA type}}
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return *vpp; // expected-error {{invalid use of PPC MMA type}}
|
||||
}
|
||||
|
||||
const vp_t *testVPRet5(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
return vpp + 2;
|
||||
}
|
||||
|
||||
// global
|
||||
__vector_pair globalvp; // expected-error {{invalid use of PPC MMA type}}
|
||||
const __vector_pair globalvp2; // expected-error {{invalid use of PPC MMA type}}
|
||||
|
@ -142,6 +243,19 @@ __vector_pair *globalvpp;
|
|||
const __vector_pair *const globalvpp2;
|
||||
vp_t globalvp_t; // expected-error {{invalid use of PPC MMA type}}
|
||||
|
||||
// local
|
||||
void testVPLocal(int *ptr, vector unsigned char vc) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
__vector_pair vp1 = *vpp;
|
||||
__vector_pair vp2;
|
||||
__builtin_vsx_assemble_pair(&vp2, vc, vc);
|
||||
__builtin_vsx_build_pair(&vp2, vc, vc);
|
||||
__vector_pair vp3;
|
||||
__vector_quad vq;
|
||||
__builtin_mma_xvf64ger(&vq, vp3, vc);
|
||||
*vpp = vp3;
|
||||
}
|
||||
|
||||
// struct field
|
||||
struct TestVPStruct {
|
||||
int a;
|
||||
|
@ -150,6 +264,17 @@ struct TestVPStruct {
|
|||
__vector_pair *vp;
|
||||
};
|
||||
|
||||
// sizeof / alignof
|
||||
int testVPSizeofAlignof(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
__vector_pair vp = *vpp;
|
||||
unsigned sizet = sizeof(__vector_pair);
|
||||
unsigned alignt = __alignof__(__vector_pair);
|
||||
unsigned sizev = sizeof(vp);
|
||||
unsigned alignv = __alignof__(vp);
|
||||
return sizet + alignt + sizev + alignv;
|
||||
}
|
||||
|
||||
// operators
|
||||
int testVPOperators1(int *ptr) {
|
||||
__vector_pair *vpp = (__vector_pair *)ptr;
|
||||
|
@ -217,7 +342,17 @@ void testRestrictQualifiedPointer1(int *__restrict acc) {
|
|||
__builtin_mma_disassemble_acc(arr, acc); // expected-error {{passing 'int *restrict' to parameter of incompatible type '__vector_quad *'}}
|
||||
}
|
||||
|
||||
void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) {
|
||||
vector float arr[4];
|
||||
__builtin_mma_disassemble_acc(arr, acc);
|
||||
}
|
||||
|
||||
void testVolatileQualifiedPointer1(int *__volatile acc) {
|
||||
vector float arr[4];
|
||||
__builtin_mma_disassemble_acc(arr, acc); // expected-error {{passing 'int *volatile' to parameter of incompatible type '__vector_quad *'}}
|
||||
}
|
||||
|
||||
void testVolatileQualifiedPointer2(__vector_quad *__volatile acc) {
|
||||
vector float arr[4];
|
||||
__builtin_mma_disassemble_acc(arr, acc);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue