forked from OSchip/llvm-project
Simplify strlen to a subtraction for certain cases.
Patch by Li Huang (li1.huang@intel.com) Differential Revision: http://reviews.llvm.org/D18230 llvm-svn: 266200
This commit is contained in:
parent
4fb7f509fd
commit
752c1448fe
|
@ -26,6 +26,7 @@ namespace llvm {
|
|||
class AssumptionCache;
|
||||
class DataLayout;
|
||||
class DominatorTree;
|
||||
class GEPOperator;
|
||||
class Instruction;
|
||||
class Loop;
|
||||
class LoopInfo;
|
||||
|
@ -183,6 +184,10 @@ namespace llvm {
|
|||
DL);
|
||||
}
|
||||
|
||||
/// Returns true if the GEP is based on a pointer to a string (array of i8),
|
||||
/// and is indexing into this string.
|
||||
bool isGEPBasedOnPointerToString(const GEPOperator *GEP);
|
||||
|
||||
/// getConstantStringInfo - This function computes the length of a
|
||||
/// null-terminated C string pointed to by V. If successful, it returns true
|
||||
/// and returns the string in Str. If unsuccessful, it returns false. This
|
||||
|
|
|
@ -2650,6 +2650,24 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
|
|||
return Ptr;
|
||||
}
|
||||
|
||||
bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) {
|
||||
// Make sure the GEP has exactly three arguments.
|
||||
if (GEP->getNumOperands() != 3)
|
||||
return false;
|
||||
|
||||
// Make sure the index-ee is a pointer to array of i8.
|
||||
ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
|
||||
if (!AT || !AT->getElementType()->isIntegerTy(8))
|
||||
return false;
|
||||
|
||||
// Check to make sure that the first operand of the GEP is an integer and
|
||||
// has value 0 so that we are sure we're indexing into the initializer.
|
||||
const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
|
||||
if (!FirstIdx || !FirstIdx->isZero())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// This function computes the length of a null-terminated C string pointed to
|
||||
/// by V. If successful, it returns true and returns the string in Str.
|
||||
|
@ -2664,19 +2682,9 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
|
|||
// If the value is a GEP instruction or constant expression, treat it as an
|
||||
// offset.
|
||||
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
|
||||
// Make sure the GEP has exactly three arguments.
|
||||
if (GEP->getNumOperands() != 3)
|
||||
return false;
|
||||
|
||||
// Make sure the index-ee is a pointer to array of i8.
|
||||
ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
|
||||
if (!AT || !AT->getElementType()->isIntegerTy(8))
|
||||
return false;
|
||||
|
||||
// Check to make sure that the first operand of the GEP is an integer and
|
||||
// has value 0 so that we are sure we're indexing into the initializer.
|
||||
const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
|
||||
if (!FirstIdx || !FirstIdx->isZero())
|
||||
// The GEP operator should be based on a pointer to string constant, and is
|
||||
// indexing into the string constant.
|
||||
if (!isGEPBasedOnPointerToString(GEP))
|
||||
return false;
|
||||
|
||||
// If the second index isn't a ConstantInt, then this is a variable index
|
||||
|
|
|
@ -535,6 +535,57 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
|
|||
if (uint64_t Len = GetStringLength(Src))
|
||||
return ConstantInt::get(CI->getType(), Len - 1);
|
||||
|
||||
// If s is a constant pointer pointing to a string literal, we can fold
|
||||
// strlen(s + x) to strlen(s) - x, when x is known to be in the range
|
||||
// [0, strlen(s)] or the string has a single null terminator '\0' at the end.
|
||||
// We only try to simplify strlen when the pointer s points to an array
|
||||
// of i8. Otherwise, we would need to scale the offset x before doing the
|
||||
// subtraction. This will make the optimization more complex, and it's not
|
||||
// very useful because calling strlen for a pointer of other types is
|
||||
// very uncommon.
|
||||
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
|
||||
if (!isGEPBasedOnPointerToString(GEP))
|
||||
return nullptr;
|
||||
|
||||
StringRef Str;
|
||||
if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) {
|
||||
size_t NullTermIdx = Str.find('\0');
|
||||
|
||||
// If the string does not have '\0', leave it to strlen to compute
|
||||
// its length.
|
||||
if (NullTermIdx == StringRef::npos)
|
||||
return nullptr;
|
||||
|
||||
Value *Offset = GEP->getOperand(2);
|
||||
unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
|
||||
APInt KnownZero(BitWidth, 0);
|
||||
APInt KnownOne(BitWidth, 0);
|
||||
computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI,
|
||||
nullptr);
|
||||
KnownZero.flipAllBits();
|
||||
size_t ArrSize =
|
||||
cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
|
||||
|
||||
// KnownZero's bits are flipped, so zeros in KnownZero now represent
|
||||
// bits known to be zeros in Offset, and ones in KnowZero represent
|
||||
// bits unknown in Offset. Therefore, Offset is known to be in range
|
||||
// [0, NullTermIdx] when the flipped KnownZero is non-negative and
|
||||
// unsigned-less-than NullTermIdx.
|
||||
//
|
||||
// If Offset is not provably in the range [0, NullTermIdx], we can still
|
||||
// optimize if we can prove that the program has undefined behavior when
|
||||
// Offset is outside that range. That is the case when GEP->getOperand(0)
|
||||
// is a pointer to an object whose memory extent is NullTermIdx+1.
|
||||
if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) ||
|
||||
(GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
|
||||
NullTermIdx == ArrSize - 1))
|
||||
return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
|
||||
Offset);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// strlen(x?"foo":"bars") --> x ? 3 : 4
|
||||
if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
|
||||
uint64_t LenTrue = GetStringLength(SI->getTrueValue());
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; NOTE: Assertions have been autogenerated by update_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; Test that the strlen library call simplifier works correctly.
|
||||
;
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
@ -113,9 +113,8 @@ define i32 @test_simplify9(i1 %x) {
|
|||
|
||||
define i32 @test_simplify10(i32 %x) {
|
||||
; CHECK-LABEL: @test_simplify10(
|
||||
; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 %x
|
||||
; CHECK-NEXT: [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
|
||||
; CHECK-NEXT: ret i32 [[HELLO_L]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 5, %x
|
||||
; CHECK-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
%hello_p = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 %x
|
||||
%hello_l = call i32 @strlen(i8* %hello_p)
|
||||
|
@ -127,9 +126,8 @@ define i32 @test_simplify10(i32 %x) {
|
|||
define i32 @test_simplify11(i32 %x) {
|
||||
; CHECK-LABEL: @test_simplify11(
|
||||
; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 7
|
||||
; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 [[AND]]
|
||||
; CHECK-NEXT: [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
|
||||
; CHECK-NEXT: ret i32 [[HELLO_L]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 9, [[AND]]
|
||||
; CHECK-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
%and = and i32 %x, 7
|
||||
%hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and
|
||||
|
|
Loading…
Reference in New Issue