forked from OSchip/llvm-project
All MMX shift instructions took a <2 x i32> vector as the shift amount parameter. Change this to be <1 x i64> instead, which matches the assembler instruction.
llvm-svn: 45027
This commit is contained in:
parent
c3286e9cef
commit
f924f34b6b
|
@ -767,30 +767,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
// Shift left logical
|
||||
def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
|
||||
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
|
||||
Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
|
||||
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
|
||||
Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
|
||||
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
llvm_v1i64_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Pack ops.
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/AutoUpgrade.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Instructions.h"
|
||||
|
@ -109,6 +110,39 @@ static Function* UpgradeIntrinsicFunction1(Function *F) {
|
|||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
case 'x':
|
||||
// This fixes all MMX shift intrinsic instructions to take a
|
||||
// v1i64 instead of a v2i32 as the second parameter.
|
||||
if (Name.compare(5,10,"x86.mmx.ps",10) == 0 &&
|
||||
(Name.compare(13,4,"psll", 4) == 0 ||
|
||||
Name.compare(13,4,"psra", 4) == 0 ||
|
||||
Name.compare(13,4,"psrl", 4) == 0)) {
|
||||
|
||||
const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1);
|
||||
|
||||
// We don't have to do anything if the parameter already has
|
||||
// the correct type.
|
||||
if (FTy->getParamType(1) == VT)
|
||||
break;
|
||||
|
||||
// We first need to change the name of the old (bad) intrinsic, because
|
||||
// its type is incorrect, but we cannot overload that name. We
|
||||
// arbitrarily unique it here allowing us to construct a correctly named
|
||||
// and typed function below.
|
||||
F->setName("");
|
||||
|
||||
assert(FTy->getNumParams() == 2 && "MMX shift intrinsics take 2 args!");
|
||||
|
||||
// Now construct the new intrinsic with the correct name and type. We
|
||||
// leave the old function around in order to query its type, whatever it
|
||||
// may be, and correctly convert up to the new type.
|
||||
return cast<Function>(M->getOrInsertFunction(Name,
|
||||
FTy->getReturnType(),
|
||||
FTy->getParamType(0),
|
||||
VT,
|
||||
(Type *)0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -141,6 +175,40 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
|
||||
switch(NewFn->getIntrinsicID()) {
|
||||
default: assert(0 && "Unknown function for CallInst upgrade.");
|
||||
case Intrinsic::x86_mmx_psll_d:
|
||||
case Intrinsic::x86_mmx_psll_q:
|
||||
case Intrinsic::x86_mmx_psll_w:
|
||||
case Intrinsic::x86_mmx_psra_d:
|
||||
case Intrinsic::x86_mmx_psra_w:
|
||||
case Intrinsic::x86_mmx_psrl_d:
|
||||
case Intrinsic::x86_mmx_psrl_q:
|
||||
case Intrinsic::x86_mmx_psrl_w: {
|
||||
SmallVector<Value*, 2> Operands;
|
||||
|
||||
Operands.push_back(CI->getOperand(1));
|
||||
|
||||
// Cast the second parameter to the correct type.
|
||||
BitCastInst *BC = new BitCastInst(CI->getOperand(2),
|
||||
NewFn->getFunctionType()->getParamType(1),
|
||||
"upgraded", CI);
|
||||
Operands.push_back(BC);
|
||||
|
||||
// Construct a new CallInst
|
||||
CallInst *NewCI = new CallInst(NewFn, Operands.begin(), Operands.end(),
|
||||
"upgraded."+CI->getName(), CI);
|
||||
NewCI->setTailCall(CI->isTailCall());
|
||||
NewCI->setCallingConv(CI->getCallingConv());
|
||||
|
||||
// Handle any uses of the old CallInst.
|
||||
if (!CI->use_empty())
|
||||
// Replace all uses of the old call with the new cast which has the
|
||||
// correct type.
|
||||
CI->replaceAllUsesWith(NewCI);
|
||||
|
||||
// Clean up the old call now that it has been completely upgraded.
|
||||
CI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case Intrinsic::ctlz:
|
||||
case Intrinsic::ctpop:
|
||||
case Intrinsic::cttz:
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
; RUN: not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
|
||||
; RUN: llvm-as < %s | llvm-dis | \
|
||||
; RUN: not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
|
||||
; RUN: llvm-as < %s | llvm-dis | \
|
||||
; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {2 x i32> | count 6
|
||||
|
||||
declare i32 @llvm.ctpop.i28(i28 %val)
|
||||
declare i32 @llvm.cttz.i29(i29 %val)
|
||||
|
@ -50,3 +52,30 @@ define i32 @test_bswap(i32 %A, i16 %B) {
|
|||
ret i32 %d
|
||||
}
|
||||
|
||||
declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone
|
||||
declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone
|
||||
declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone
|
||||
define void @sh16(<4 x i16> %A, <2 x i32> %B) {
|
||||
%r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0]
|
||||
%r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0]
|
||||
%r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B ) ; <<4 x i16>> [#uses=0]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone
|
||||
define void @sh32(<2 x i32> %A, <2 x i32> %B) {
|
||||
%r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0]
|
||||
%r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0]
|
||||
%r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B ) ; <<2 x i32>> [#uses=0]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone
|
||||
declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone
|
||||
define void @sh64(<1 x i64> %A, <2 x i32> %B) {
|
||||
%r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0]
|
||||
%r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0]
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue