forked from OSchip/llvm-project
[LibCallSimplifier] try harder to fold memcmp with constant arguments (2nd try)
The 1st try was reverted because it could inf-loop by creating a dead instruction. Fixed that to not happen and added a test case to verify. Original commit message: Try to fold: memcmp(X, C, ConstantLength) == 0 --> load X == *C Without this change, we're unnecessarily checking the alignment of the constant data, so we miss the transform in the first 2 tests in the patch. I noted this shortcoming of LibCallSimpifier in one of the recent CGP memcmp expansion patches. This doesn't help the example in: https://bugs.llvm.org/show_bug.cgi?id=34032#c13 ...directly, but it's worth short-circuiting more of these simple cases since we're already trying to do that. The benefit of transforming to load+cmp is that existing IR analysis/transforms may further simplify that code. For example, if the load of the variable is common to multiple memcmp calls, CSE can remove the duplicate instructions. Differential Revision: https://reviews.llvm.org/D36922 llvm-svn: 311366
This commit is contained in:
parent
d986545df6
commit
82ec872990
|
@ -18,6 +18,7 @@
|
||||||
#include "llvm/ADT/SmallString.h"
|
#include "llvm/ADT/SmallString.h"
|
||||||
#include "llvm/ADT/StringMap.h"
|
#include "llvm/ADT/StringMap.h"
|
||||||
#include "llvm/ADT/Triple.h"
|
#include "llvm/ADT/Triple.h"
|
||||||
|
#include "llvm/Analysis/ConstantFolding.h"
|
||||||
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
|
||||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||||
#include "llvm/Analysis/ValueTracking.h"
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
|
@ -751,29 +752,44 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
|
// memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
|
||||||
|
// TODO: The case where both inputs are constants does not need to be limited
|
||||||
|
// to legal integers or equality comparison. See block below this.
|
||||||
if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
|
if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
|
||||||
|
|
||||||
IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
|
IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
|
||||||
unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
|
unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
|
||||||
|
|
||||||
if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
|
// First, see if we can fold either argument to a constant.
|
||||||
getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
|
Value *LHSV = nullptr;
|
||||||
|
if (auto *LHSC = dyn_cast<Constant>(LHS)) {
|
||||||
Type *LHSPtrTy =
|
LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
|
||||||
IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
|
LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
|
||||||
Type *RHSPtrTy =
|
}
|
||||||
IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
|
Value *RHSV = nullptr;
|
||||||
|
if (auto *RHSC = dyn_cast<Constant>(RHS)) {
|
||||||
Value *LHSV =
|
RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
|
||||||
B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
|
RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
|
||||||
Value *RHSV =
|
}
|
||||||
B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
|
|
||||||
|
|
||||||
|
// Don't generate unaligned loads. If either source is constant data,
|
||||||
|
// alignment doesn't matter for that source because there is no load.
|
||||||
|
if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
|
||||||
|
(RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
|
||||||
|
if (!LHSV) {
|
||||||
|
Type *LHSPtrTy =
|
||||||
|
IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
|
||||||
|
LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
|
||||||
|
}
|
||||||
|
if (!RHSV) {
|
||||||
|
Type *RHSPtrTy =
|
||||||
|
IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
|
||||||
|
RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
|
||||||
|
}
|
||||||
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
|
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
|
// Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
|
||||||
|
// TODO: This is limited to i8 arrays.
|
||||||
StringRef LHSStr, RHSStr;
|
StringRef LHSStr, RHSStr;
|
||||||
if (getConstantStringInfo(LHS, LHSStr) &&
|
if (getConstantStringInfo(LHS, LHSStr) &&
|
||||||
getConstantStringInfo(RHS, RHSStr)) {
|
getConstantStringInfo(RHS, RHSStr)) {
|
||||||
|
|
|
@ -3,31 +3,45 @@
|
||||||
|
|
||||||
declare i32 @memcmp(i8*, i8*, i64)
|
declare i32 @memcmp(i8*, i8*, i64)
|
||||||
|
|
||||||
; TODO: The alignment of this constant does not matter. We constant fold the load.
|
; The alignment of this constant does not matter. We constant fold the load.
|
||||||
|
|
||||||
@charbuf = private unnamed_addr constant [4 x i8] [i8 0, i8 0, i8 0, i8 1], align 1
|
@charbuf = private unnamed_addr constant [4 x i8] [i8 0, i8 0, i8 0, i8 1], align 1
|
||||||
|
|
||||||
define i1 @memcmp_4bytes_unaligned_constant_i8(i8* align 4 %x) {
|
define i1 @memcmp_4bytes_unaligned_constant_i8(i8* align 4 %x) {
|
||||||
; ALL-LABEL: @memcmp_4bytes_unaligned_constant_i8(
|
; LE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
|
||||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4)
|
; LE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
|
||||||
; ALL-NEXT: [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
|
; LE-NEXT: [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
|
||||||
; ALL-NEXT: ret i1 [[CMPEQ0]]
|
; LE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 16777216
|
||||||
|
; LE-NEXT: ret i1 [[TMP2]]
|
||||||
|
;
|
||||||
|
; BE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
|
||||||
|
; BE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
|
||||||
|
; BE-NEXT: [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
|
||||||
|
; BE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 1
|
||||||
|
; BE-NEXT: ret i1 [[TMP2]]
|
||||||
;
|
;
|
||||||
%call = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4)
|
%call = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4)
|
||||||
%cmpeq0 = icmp eq i32 %call, 0
|
%cmpeq0 = icmp eq i32 %call, 0
|
||||||
ret i1 %cmpeq0
|
ret i1 %cmpeq0
|
||||||
}
|
}
|
||||||
|
|
||||||
; TODO: We still don't care about alignment of the constant. We are not limited to constant folding only i8 arrays.
|
; We still don't care about alignment of the constant. We are not limited to constant folding only i8 arrays.
|
||||||
; It doesn't matter if the constant operand is the first operand to the memcmp.
|
; It doesn't matter if the constant operand is the first operand to the memcmp.
|
||||||
|
|
||||||
@intbuf_unaligned = private unnamed_addr constant [4 x i16] [i16 1, i16 2, i16 3, i16 4], align 1
|
@intbuf_unaligned = private unnamed_addr constant [4 x i16] [i16 1, i16 2, i16 3, i16 4], align 1
|
||||||
|
|
||||||
define i1 @memcmp_4bytes_unaligned_constant_i16(i8* align 4 %x) {
|
define i1 @memcmp_4bytes_unaligned_constant_i16(i8* align 4 %x) {
|
||||||
; ALL-LABEL: @memcmp_4bytes_unaligned_constant_i16(
|
; LE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
|
||||||
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast ([4 x i16]* @intbuf_unaligned to i8*), i8* %x, i64 4)
|
; LE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
|
||||||
; ALL-NEXT: [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
|
; LE-NEXT: [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
|
||||||
; ALL-NEXT: ret i1 [[CMPEQ0]]
|
; LE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 131073
|
||||||
|
; LE-NEXT: ret i1 [[TMP2]]
|
||||||
|
;
|
||||||
|
; BE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
|
||||||
|
; BE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
|
||||||
|
; BE-NEXT: [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
|
||||||
|
; BE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 65538
|
||||||
|
; BE-NEXT: ret i1 [[TMP2]]
|
||||||
;
|
;
|
||||||
%call = tail call i32 @memcmp(i8* bitcast (i16* getelementptr inbounds ([4 x i16], [4 x i16]* @intbuf_unaligned, i64 0, i64 0) to i8*), i8* %x, i64 4)
|
%call = tail call i32 @memcmp(i8* bitcast (i16* getelementptr inbounds ([4 x i16], [4 x i16]* @intbuf_unaligned, i64 0, i64 0) to i8*), i8* %x, i64 4)
|
||||||
%cmpeq0 = icmp eq i32 %call, 0
|
%cmpeq0 = icmp eq i32 %call, 0
|
||||||
|
@ -49,3 +63,18 @@ define i1 @memcmp_3bytes_aligned_constant_i32(i8* align 4 %x) {
|
||||||
ret i1 %cmpeq0
|
ret i1 %cmpeq0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; A sloppy implementation would infinite loop by recreating the unused instructions.
|
||||||
|
|
||||||
|
define i1 @memcmp_4bytes_one_unaligned_i8(i8* align 4 %x, i8* align 1 %y) {
|
||||||
|
; ALL-LABEL: @memcmp_4bytes_one_unaligned_i8(
|
||||||
|
; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
|
||||||
|
; ALL-NEXT: [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
|
||||||
|
; ALL-NEXT: ret i1 [[CMPEQ0]]
|
||||||
|
;
|
||||||
|
%bc = bitcast i8* %x to i32*
|
||||||
|
%lhsv = load i32, i32* %bc
|
||||||
|
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
|
||||||
|
%cmpeq0 = icmp eq i32 %call, 0
|
||||||
|
ret i1 %cmpeq0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue