forked from OSchip/llvm-project
InterleaveAccessPass: Avoid constructing invalid shuffle masks
Fix a bug where we would construct shufflevector instructions addressing invalid elements. Differential Revision: https://reviews.llvm.org/D29313 llvm-svn: 293673
This commit is contained in:
parent
6342cf9dc3
commit
01fa962226
|
@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
|||
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
|
||||
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
|
||||
static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
||||
unsigned MaxFactor) {
|
||||
unsigned MaxFactor, unsigned OpNumElts) {
|
||||
unsigned NumElts = Mask.size();
|
||||
if (NumElts < 4)
|
||||
return false;
|
||||
|
@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
|
|||
|
||||
if (StartMask < 0)
|
||||
break;
|
||||
// We must stay within the vectors; This case can happen with undefs.
|
||||
if (StartMask + LaneLen > OpNumElts*2)
|
||||
break;
|
||||
}
|
||||
|
||||
// Found an interleaved mask of current factor.
|
||||
|
@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore(
|
|||
|
||||
// Check if the shufflevector is RE-interleave shuffle.
|
||||
unsigned Factor;
|
||||
if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor))
|
||||
unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
|
||||
if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
|
||||
|
|
|
@ -547,3 +547,21 @@ define void @store_general_mask_factor3_negativestart(<12 x i32>* %ptr, <32 x i3
|
|||
store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@g = external global <4 x float>
|
||||
|
||||
; The following does not give a valid interleaved store
|
||||
; NEON-LABEL: define void @no_interleave
|
||||
; NEON-NOT: call void @llvm.aarch64.neon.st2
|
||||
; NEON: shufflevector
|
||||
; NEON: store
|
||||
; NEON: ret void
|
||||
; NO_NEON-LABEL: define void @no_interleave
|
||||
; NO_NEON: shufflevector
|
||||
; NO_NEON: store
|
||||
; NO_NEON: ret void
|
||||
define void @no_interleave(<4 x float> %a0) {
|
||||
%v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 3, i32 7, i32 undef>
|
||||
store <4 x float> %v0, <4 x float>* @g, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -626,3 +626,21 @@ define void @store_general_mask_factor3_midstart_pass(<12 x i32>* %ptr, <32 x i3
|
|||
store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@g = external global <4 x float>
|
||||
|
||||
; The following does not give a valid interleaved store
|
||||
; NEON-LABEL: define void @no_interleave
|
||||
; NEON-NOT: call void @llvm.arm.neon.vst2
|
||||
; NEON: shufflevector
|
||||
; NEON: store
|
||||
; NEON: ret void
|
||||
; NO_NEON-LABEL: define void @no_interleave
|
||||
; NO_NEON: shufflevector
|
||||
; NO_NEON: store
|
||||
; NO_NEON: ret void
|
||||
define void @no_interleave(<4 x float> %a0) {
|
||||
%v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
|
||||
store <4 x float> %v0, <4 x float>* @g, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue