[InstCombine][SSE4A] Standardized references to Length/Width and Index/Start to match AMD docs. NFCI.

llvm-svn: 243226
This commit is contained in:
Simon Pilgrim 2015-07-25 20:41:00 +00:00
parent 3b1c990dcc
commit 54fcd62c6f
1 changed files with 31 additions and 34 deletions

View File

@ -203,7 +203,7 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II,
if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
VectorType *VecTy = cast<VectorType>(II.getType()); VectorType *VecTy = cast<VectorType>(II.getType());
assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
// The immediate permute control byte looks like this: // The immediate permute control byte looks like this:
// [3:0] - zero mask for each 32-bit lane // [3:0] - zero mask for each 32-bit lane
// [5:4] - select one 32-bit destination lane // [5:4] - select one 32-bit destination lane
@ -248,7 +248,7 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II,
// Replace the selected destination lane with the selected source lane. // Replace the selected destination lane with the selected source lane.
ShuffleMask[DestLane] = SourceLane + 4; ShuffleMask[DestLane] = SourceLane + 4;
} }
return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask); return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
} }
return nullptr; return nullptr;
@ -289,7 +289,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
// The high bit of the selection field chooses the 1st or 2nd operand. // The high bit of the selection field chooses the 1st or 2nd operand.
bool LowInputSelect = Imm & 0x02; bool LowInputSelect = Imm & 0x02;
bool HighInputSelect = Imm & 0x20; bool HighInputSelect = Imm & 0x20;
// The low bit of the selection field chooses the low or high half // The low bit of the selection field chooses the low or high half
// of the selected operand. // of the selected operand.
bool LowHalfSelect = Imm & 0x01; bool LowHalfSelect = Imm & 0x01;
@ -298,11 +298,11 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
// Determine which operand(s) are actually in use for this instruction. // Determine which operand(s) are actually in use for this instruction.
Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
// If needed, replace operands based on zero mask. // If needed, replace operands based on zero mask.
V0 = LowHalfZero ? ZeroVector : V0; V0 = LowHalfZero ? ZeroVector : V0;
V1 = HighHalfZero ? ZeroVector : V1; V1 = HighHalfZero ? ZeroVector : V1;
// Permute low half of result. // Permute low half of result.
unsigned StartIndex = LowHalfSelect ? HalfSize : 0; unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
for (unsigned i = 0; i < HalfSize; ++i) for (unsigned i = 0; i < HalfSize; ++i)
@ -801,26 +801,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Value *V = SimplifyX86insertps(*II, *Builder)) if (Value *V = SimplifyX86insertps(*II, *Builder))
return ReplaceInstUsesWith(*II, V); return ReplaceInstUsesWith(*II, V);
break; break;
case Intrinsic::x86_sse4a_insertqi: { case Intrinsic::x86_sse4a_insertqi: {
// insertqi x, y, 64, 0 can just copy y's lower bits and leave the top // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
// ones undef // ones undef
// TODO: eventually we should lower this intrinsic to IR // TODO: eventually we should lower this intrinsic to IR
if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) { if (auto CILength = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) { if (auto CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
unsigned Index = CIStart->getZExtValue(); unsigned Index = CIIndex->getZExtValue();
// From AMD documentation: "a value of zero in the field length is // From AMD documentation: "a value of zero in the field length is
// defined as length of 64". // defined as length of 64".
unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue(); unsigned Length = CILength->equalsInt(0) ? 64 : CILength->getZExtValue();
// From AMD documentation: "If the sum of the bit index + length field // From AMD documentation: "If the sum of the bit index + length field
// is greater than 64, the results are undefined". // is greater than 64, the results are undefined".
unsigned End = Index + Length;
// Note that both field index and field length are 8-bit quantities. // Note that both field index and field length are 8-bit quantities.
// Since variables 'Index' and 'Length' are unsigned values // Since variables 'Index' and 'Length' are unsigned values
// obtained from zero-extending field index and field length // obtained from zero-extending field index and field length
// respectively, their sum should never wrap around. // respectively, their sum should never wrap around.
if ((Index + Length) > 64) if (End > 64)
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
if (Length == 64 && Index == 0) { if (Length == 64 && Index == 0) {
@ -832,7 +833,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Builder->CreateShuffleVector( Builder->CreateShuffleVector(
Vec, Undef, ConstantDataVector::get( Vec, Undef, ConstantDataVector::get(
II->getContext(), makeArrayRef(Mask)))); II->getContext(), makeArrayRef(Mask))));
} else if (auto Source = } else if (auto Source =
dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
if (Source->hasOneUse() && if (Source->hasOneUse() &&
@ -840,37 +840,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// If the source of the insert has only one use and it's another // If the source of the insert has only one use and it's another
// insert (and they're both inserting from the same vector), try to // insert (and they're both inserting from the same vector), try to
// bundle both together. // bundle both together.
auto CISourceWidth = auto CISourceLength =
dyn_cast<ConstantInt>(Source->getArgOperand(2)); dyn_cast<ConstantInt>(Source->getArgOperand(2));
auto CISourceStart = auto CISourceIndex =
dyn_cast<ConstantInt>(Source->getArgOperand(3)); dyn_cast<ConstantInt>(Source->getArgOperand(3));
if (CISourceStart && CISourceWidth) { if (CISourceIndex && CISourceLength) {
unsigned Start = CIStart->getZExtValue(); unsigned SourceIndex = CISourceIndex->getZExtValue();
unsigned Width = CIWidth->getZExtValue(); unsigned SourceLength = CISourceLength->getZExtValue();
unsigned End = Start + Width; unsigned SourceEnd = SourceIndex + SourceLength;
unsigned SourceStart = CISourceStart->getZExtValue(); unsigned NewIndex, NewLength;
unsigned SourceWidth = CISourceWidth->getZExtValue();
unsigned SourceEnd = SourceStart + SourceWidth;
unsigned NewStart, NewWidth;
bool ShouldReplace = false; bool ShouldReplace = false;
if (Start <= SourceStart && SourceStart <= End) { if (Index <= SourceIndex && SourceIndex <= End) {
NewStart = Start; NewIndex = Index;
NewWidth = std::max(End, SourceEnd) - NewStart; NewLength = std::max(End, SourceEnd) - NewIndex;
ShouldReplace = true; ShouldReplace = true;
} else if (SourceStart <= Start && Start <= SourceEnd) { } else if (SourceIndex <= Index && Index <= SourceEnd) {
NewStart = SourceStart; NewIndex = SourceIndex;
NewWidth = std::max(SourceEnd, End) - NewStart; NewLength = std::max(SourceEnd, End) - NewIndex;
ShouldReplace = true; ShouldReplace = true;
} }
if (ShouldReplace) { if (ShouldReplace) {
Constant *ConstantWidth = ConstantInt::get( Constant *ConstantLength = ConstantInt::get(
II->getArgOperand(2)->getType(), NewWidth, false); II->getArgOperand(2)->getType(), NewLength, false);
Constant *ConstantStart = ConstantInt::get( Constant *ConstantIndex = ConstantInt::get(
II->getArgOperand(3)->getType(), NewStart, false); II->getArgOperand(3)->getType(), NewIndex, false);
Value *Args[4] = { Source->getArgOperand(0), Value *Args[4] = { Source->getArgOperand(0),
II->getArgOperand(1), ConstantWidth, II->getArgOperand(1), ConstantLength,
ConstantStart }; ConstantIndex };
Module *M = CI.getParent()->getParent()->getParent(); Module *M = CI.getParent()->getParent()->getParent();
Value *F = Value *F =
Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);