forked from OSchip/llvm-project
[InstCombine] fold bitcasts around an extractelement (2nd try)
This is a redo of r255124 (reverted at r255126) with an added check for a scalar destination type and an added test for the failure seen in Clang's test/CodeGen/vector.c. The extra test shows a different missing optimization. Original commit message: Example: bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float ---> extractelement <2 x float> %X, i32 1 This is part of fixing PR25543: https://llvm.org/bugs/show_bug.cgi?id=25543 The next step will be to generalize this fold: trunc ( lshr ( bitcast X) ) -> extractelement (X) Ie, I'm hoping to replace the existing transform of: bitcast ( trunc ( lshr ( bitcast X))) added by: http://reviews.llvm.org/rL112232 with 2 less specific transforms to catch the case in the bug report. Differential Revision: http://reviews.llvm.org/D14879 llvm-svn: 255137
This commit is contained in:
parent
9c54984d43
commit
b67e6b6044
|
@ -1715,6 +1715,42 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a bitcasted vector fed into an extract element instruction and then
|
||||||
|
/// bitcasted again to a scalar type, eliminate at least one bitcast by changing
|
||||||
|
/// the vector type of the extractelement instruction.
|
||||||
|
/// Example:
|
||||||
|
/// bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float
|
||||||
|
/// --->
|
||||||
|
/// extractelement <2 x float> %X, i32 1
|
||||||
|
static Instruction *foldBitCastExtElt(BitCastInst &BitCast, InstCombiner &IC,
|
||||||
|
const DataLayout &DL) {
|
||||||
|
Type *DestType = BitCast.getType();
|
||||||
|
if (DestType->isVectorTy())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// TODO: Create and use a pattern matcher for ExtractElementInst.
|
||||||
|
auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
|
||||||
|
if (!ExtElt || !ExtElt->hasOneUse())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
Value *InnerBitCast = nullptr;
|
||||||
|
if (!match(ExtElt->getOperand(0), m_BitCast(m_Value(InnerBitCast))))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
// If the element type of the vector doesn't match the result type,
|
||||||
|
// bitcast it to a vector type that we can extract from.
|
||||||
|
VectorType *VecType = cast<VectorType>(InnerBitCast->getType());
|
||||||
|
if (VecType->getElementType() != DestType) {
|
||||||
|
unsigned VecWidth = VecType->getPrimitiveSizeInBits();
|
||||||
|
unsigned DestWidth = DestType->getPrimitiveSizeInBits();
|
||||||
|
unsigned NumElts = VecWidth / DestWidth;
|
||||||
|
VecType = VectorType::get(DestType, NumElts);
|
||||||
|
InnerBitCast = IC.Builder->CreateBitCast(InnerBitCast, VecType, "bc");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ExtractElementInst::Create(InnerBitCast, ExtElt->getOperand(1));
|
||||||
|
}
|
||||||
|
|
||||||
static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy,
|
static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy,
|
||||||
unsigned ShiftAmt, InstCombiner &IC,
|
unsigned ShiftAmt, InstCombiner &IC,
|
||||||
const DataLayout &DL) {
|
const DataLayout &DL) {
|
||||||
|
@ -1886,6 +1922,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Instruction *I = foldBitCastExtElt(CI, *this, DL))
|
||||||
|
return I;
|
||||||
|
|
||||||
if (SrcTy->isPointerTy())
|
if (SrcTy->isPointerTy())
|
||||||
return commonPointerCastTransforms(CI);
|
return commonPointerCastTransforms(CI);
|
||||||
return commonCastTransforms(CI);
|
return commonCastTransforms(CI);
|
||||||
|
|
|
@ -64,7 +64,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
|
||||||
; CHECK-NEXT: ret float %add
|
; CHECK-NEXT: ret float %add
|
||||||
}
|
}
|
||||||
|
|
||||||
; TODO: Both bitcasts are unnecessary; change the extractelement.
|
; Both bitcasts are unnecessary; change the extractelement.
|
||||||
|
|
||||||
define float @bitcast_extelt1(<2 x float> %A) {
|
define float @bitcast_extelt1(<2 x float> %A) {
|
||||||
%bc1 = bitcast <2 x float> %A to <2 x i32>
|
%bc1 = bitcast <2 x float> %A to <2 x i32>
|
||||||
|
@ -73,13 +73,11 @@ define float @bitcast_extelt1(<2 x float> %A) {
|
||||||
ret float %bc2
|
ret float %bc2
|
||||||
|
|
||||||
; CHECK-LABEL: @bitcast_extelt1(
|
; CHECK-LABEL: @bitcast_extelt1(
|
||||||
; CHECK-NEXT: %bc1 = bitcast <2 x float> %A to <2 x i32>
|
; CHECK-NEXT: %bc2 = extractelement <2 x float> %A, i32 0
|
||||||
; CHECK-NEXT: %ext = extractelement <2 x i32> %bc1, i32 0
|
|
||||||
; CHECK-NEXT: %bc2 = bitcast i32 %ext to float
|
|
||||||
; CHECK-NEXT: ret float %bc2
|
; CHECK-NEXT: ret float %bc2
|
||||||
}
|
}
|
||||||
|
|
||||||
; TODO: Second bitcast can be folded into the first.
|
; Second bitcast can be folded into the first.
|
||||||
|
|
||||||
define i64 @bitcast_extelt2(<4 x float> %A) {
|
define i64 @bitcast_extelt2(<4 x float> %A) {
|
||||||
%bc1 = bitcast <4 x float> %A to <2 x double>
|
%bc1 = bitcast <4 x float> %A to <2 x double>
|
||||||
|
@ -88,12 +86,26 @@ define i64 @bitcast_extelt2(<4 x float> %A) {
|
||||||
ret i64 %bc2
|
ret i64 %bc2
|
||||||
|
|
||||||
; CHECK-LABEL: @bitcast_extelt2(
|
; CHECK-LABEL: @bitcast_extelt2(
|
||||||
; CHECK-NEXT: %bc1 = bitcast <4 x float> %A to <2 x double>
|
; CHECK-NEXT: %bc = bitcast <4 x float> %A to <2 x i64>
|
||||||
; CHECK-NEXT: %ext = extractelement <2 x double> %bc1, i32 1
|
; CHECK-NEXT: %bc2 = extractelement <2 x i64> %bc, i32 1
|
||||||
; CHECK-NEXT: %bc2 = bitcast double %ext to i64
|
|
||||||
; CHECK-NEXT: ret i64 %bc2
|
; CHECK-NEXT: ret i64 %bc2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; TODO: This should return %A.
|
||||||
|
|
||||||
|
define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
|
||||||
|
%bc1 = bitcast <2 x i32> %A to <1 x i64>
|
||||||
|
%ext = extractelement <1 x i64> %bc1, i32 0
|
||||||
|
%bc2 = bitcast i64 %ext to <2 x i32>
|
||||||
|
ret <2 x i32> %bc2
|
||||||
|
|
||||||
|
; CHECK-LABEL: @bitcast_extelt3(
|
||||||
|
; CHECK-NEXT: %bc1 = bitcast <2 x i32> %A to <1 x i64>
|
||||||
|
; CHECK-NEXT: %ext = extractelement <1 x i64> %bc1, i32 0
|
||||||
|
; CHECK-NEXT: %bc2 = bitcast i64 %ext to <2 x i32>
|
||||||
|
; CHECK-NEXT: ret <2 x i32> %bc2
|
||||||
|
}
|
||||||
|
|
||||||
define <2 x i32> @test4(i32 %A, i32 %B){
|
define <2 x i32> @test4(i32 %A, i32 %B){
|
||||||
%tmp38 = zext i32 %A to i64
|
%tmp38 = zext i32 %A to i64
|
||||||
%tmp32 = zext i32 %B to i64
|
%tmp32 = zext i32 %B to i64
|
||||||
|
|
Loading…
Reference in New Issue