[ARM] Allow pointer values in ARMCodeGenPrepare

Add pointers to the list of allowed types, but don't try to promote
them. Also fixed a bug with the promotion of undef values, so a new
value is now created instead of mutating in place. We also now only
promote if there's an instruction in the use-def chains other than
the icmp, sinks and sources.

Differential Revision: https://reviews.llvm.org/D50054

llvm-svn: 339754
This commit is contained in:
Sam Parker 2018-08-15 07:52:35 +00:00
parent 5a10d127b9
commit 7def86bbdb
4 changed files with 327 additions and 196 deletions

View File

@ -127,7 +127,9 @@ static bool isSigned(Value *V) {
static bool isSupportedType(Value *V) {
LLVM_DEBUG(dbgs() << "ARM CGP: isSupportedType: " << *V << "\n");
Type *Ty = V->getType();
if (Ty->isVoidTy())
// Allow voids and pointers, these won't be promoted.
if (Ty->isVoidTy() || Ty->isPointerTy())
return true;
if (auto *Ld = dyn_cast<LoadInst>(V))
@ -150,6 +152,8 @@ static bool isSupportedType(Value *V) {
/// Many arguments will have the zeroext attribute too, so those would be free
/// too.
static bool isSource(Value *V) {
if (!isa<IntegerType>(V->getType()))
return false;
// TODO Allow truncs and zext to be sources.
if (isa<Argument>(V))
return true;
@ -222,8 +226,10 @@ static bool isSafeOverflow(Instruction *I) {
}
static bool shouldPromote(Value *V) {
if (!isa<IntegerType>(V->getType()) || isSink(V))
if (!isa<IntegerType>(V->getType()) || isSink(V)) {
LLVM_DEBUG(dbgs() << "ARM CGP: Don't need to promote: " << *V << "\n");
return false;
}
if (isSource(V))
return true;
@ -369,21 +375,19 @@ void IRPromoter::Mutate(Type *OrigTy,
if (Leaves.count(V))
continue;
if (!isa<Instruction>(V))
continue;
auto *I = cast<Instruction>(V);
if (Roots.count(I))
continue;
for (auto &U : I->operands()) {
if ((U->getType() == ExtTy) || !isSupportedType(&*U))
for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
Value *Op = I->getOperand(i);
if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
continue;
if (auto *Const = dyn_cast<ConstantInt>(&*U))
if (auto *Const = dyn_cast<ConstantInt>(Op))
FixConst(Const, I);
else if (isa<UndefValue>(&*U))
U->mutateType(ExtTy);
else if (isa<UndefValue>(Op))
I->setOperand(i, UndefValue::get(ExtTy));
}
if (shouldPromote(I)) {
@ -398,9 +402,6 @@ void IRPromoter::Mutate(Type *OrigTy,
if (Leaves.count(V))
continue;
if (!isa<Instruction>(V))
continue;
if (!shouldPromote(V) || isPromotedResultSafe(V))
continue;
@ -424,6 +425,9 @@ void IRPromoter::Mutate(Type *OrigTy,
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Value *V = I->getOperand(i);
if (!isa<IntegerType>(V->getType()))
continue;
if (Promoted.count(V) || NewInsts.count(V)) {
if (auto *Op = dyn_cast<Instruction>(V)) {
@ -466,7 +470,7 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
return true;
// Non-instruction values that we can handle.
if (isa<ConstantInt>(V) || isa<Argument>(V))
if ((isa<Constant>(V) && !isa<ConstantExpr>(V)) || isa<Argument>(V))
return isSupportedType(V);
if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
@ -558,10 +562,6 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (CurrentVisited.count(V))
return true;
// Ignore pointer value that aren't instructions.
if (!isa<Instruction>(V) && isa<PointerType>(V->getType()))
return true;
if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
return false;
@ -578,6 +578,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (CurrentVisited.count(V))
continue;
// Ignore non-instructions, other than arguments.
if (!isa<Instruction>(V) && !isSource(V))
continue;
@ -620,6 +621,17 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
for (auto *I : CurrentVisited)
I->dump();
);
unsigned ToPromote = 0;
for (auto *V : CurrentVisited) {
if (Leaves.count(V))
continue;
if (Roots.count(cast<Instruction>(V)))
continue;
++ToPromote;
}
if (ToPromote < 2)
return false;
Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
return true;

View File

@ -1,148 +1,5 @@
; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
; Test that ARMCodeGenPrepare can handle:
; - loops
; - call operands
; - call return values
; - ret instructions
; We use nuw on the arithmetic instructions to avoid complications.
; Check that the arguments are extended but then nothing else is.
; This also ensures that the pass can handle loops.
; CHECK-COMMON-LABEL: phi_feeding_phi_args
; CHECK-COMMON: uxtb
; CHECK-COMMON: uxtb
; CHECK-NOT: uxtb
define void @phi_feeding_phi_args(i8 %a, i8 %b) {
entry:
%0 = icmp ugt i8 %a, %b
br i1 %0, label %preheader, label %empty
empty:
br label %preheader
preheader:
%1 = phi i8 [ %a, %entry ], [ %b, %empty ]
br label %loop
loop:
%val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
%cmp = icmp ult i8 %val, 254
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = sub nuw i8 %val, 2
br label %if.end
if.else:
%inc1 = shl nuw i8 %val, 1
br label %if.end
if.end:
%inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp eq i8 %inc2, 255
br i1 %cmp1, label %exit, label %loop
exit:
ret void
}
; Same as above, but as the args are zeroext, we shouldn't see any uxts.
; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args
; CHECK-COMMON-NOT: uxt
define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) {
entry:
%0 = icmp ugt i8 %a, %b
br i1 %0, label %preheader, label %empty
empty:
br label %preheader
preheader:
%1 = phi i8 [ %a, %entry ], [ %b, %empty ]
br label %loop
loop:
%val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
%cmp = icmp ult i8 %val, 254
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = sub nuw i8 %val, 2
br label %if.end
if.else:
%inc1 = shl nuw i8 %val, 1
br label %if.end
if.end:
%inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp eq i8 %inc2, 255
br i1 %cmp1, label %exit, label %loop
exit:
ret void
}
; Just check that phis also work with i16s.
; CHECK-COMMON-LABEL: phi_i16:
; CHECK-COMMON-NOT: uxt
define void @phi_i16() {
entry:
br label %loop
loop:
%val = phi i16 [ 0, %entry ], [ %inc2, %if.end ]
%cmp = icmp ult i16 %val, 128
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = add nuw i16 %val, 2
br label %if.end
if.else:
%inc1 = add nuw i16 %val, 1
br label %if.end
if.end:
%inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp ult i16 %inc2, 253
br i1 %cmp1, label %loop, label %exit
exit:
ret void
}
; CHECK-COMMON-LABEL: ret_i8
; CHECK-COMMON-NOT: uxt
define i8 @ret_i8() {
entry:
br label %loop
loop:
%val = phi i8 [ 0, %entry ], [ %inc2, %if.end ]
%cmp = icmp ult i8 %val, 128
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = add nuw i8 %val, 2
br label %if.end
if.else:
%inc1 = add nuw i8 %val, 1
br label %if.end
if.end:
%inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp ult i8 %inc2, 253
br i1 %cmp1, label %exit, label %loop
exit:
ret i8 %inc2
}
; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s
; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s
; Check that the pass doesn't try to promote the immediate parameters.
; CHECK-COMMON-LABEL: call_with_imms
@ -232,32 +89,12 @@ entry:
ret i32 undef
}
; Transform will bail because of the zext
; Check that d.sroa.0.0.be is promoted passed directly into the tail call.
; CHECK-COMMON-LABEL: check_zext_phi_call_arg
; CHECK-COMMON: uxt
define i32 @check_zext_phi_call_arg() {
entry:
br label %for.cond
for.cond: ; preds = %for.cond.backedge, %entry
%d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ]
%tobool = icmp eq i16 %d.sroa.0.0, 0
br i1 %tobool, label %for.cond.backedge, label %if.then
for.cond.backedge: ; preds = %for.cond, %if.then
%d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ]
br label %for.cond
if.then: ; preds = %for.cond
%d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32
%call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2
br label %for.cond.backedge
}
; The call to safe_lshift_func takes two parameters, but they're the same value just one is zext.
; The transform won't happen because of the zext.
; CHECK-COMMON-LABEL: call_zext_i8_i32
; CHECK-COMMON-NOT: uxt
; CHECK-COMMON: cmp
; CHECK-COMMON: uxtb
define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) {
for.cond8.preheader:
%call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef)
@ -281,7 +118,7 @@ for.end411: ; preds = %for.cond8.preheader
@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4
@g_82 = hidden local_unnamed_addr global i32 0, align 4
; Test that the transform bails on finding a call which returns a i16**
; Test that the transform bails on finding %conv4, a trunc
; CHECK-COMMON-LABEL: call_return_pointer
; CHECK-COMMON: sxth
; CHECK-COMMON-NOT: uxt
@ -308,16 +145,40 @@ if.then: ; preds = %for.cond
br label %for.cond.backedge
}
; Transform will bail because of the zext
; Check that d.sroa.0.0.be is promoted passed directly into the tail call.
; CHECK-COMMON-LABEL: check_zext_phi_call_arg
; CHECK-COMMON: uxt
; CHECK-COMMON: uxt
define i32 @check_zext_phi_call_arg() {
entry:
br label %for.cond
for.cond: ; preds = %for.cond.backedge, %entry
%d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ]
%tobool = icmp eq i16 %d.sroa.0.0, 0
br i1 %tobool, label %for.cond.backedge, label %if.then
for.cond.backedge: ; preds = %for.cond, %if.then
%d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ]
br label %for.cond
if.then: ; preds = %for.cond
%d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32
%call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2
br label %for.cond.backedge
}
declare i32 @assert(...)
declare i8 @dummy_i8(i8)
declare i8 @dummy2(i8*, i8, i8)
declare i16 @dummy3(i16)
declare dso_local i32 @e(...) local_unnamed_addr #1
declare dso_local zeroext i16 @f(...) local_unnamed_addr #1
declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66)
declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2)
declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64)
declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32)
declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext)
declare dso_local i32 @e(...) local_unnamed_addr #1
declare dso_local zeroext i16 @f(...) local_unnamed_addr #1
declare i8 @dummy_i8(i8)
declare i8 @dummy2(i8*, i8, i8)
declare i16 @dummy3(i16)
declare i32 @assert(...)

View File

@ -0,0 +1,174 @@
; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
; Test that ARMCodeGenPrepare can handle:
; - loops
; - call operands
; - call return values
; - ret instructions
; We use nuw on the arithmetic instructions to avoid complications.
; Check that the arguments are extended but then nothing else is.
; This also ensures that the pass can handle loops.
; CHECK-COMMON-LABEL: phi_feeding_phi_args
; CHECK-COMMON: uxtb
; CHECK-COMMON: uxtb
; CHECK-NOT: uxtb
define void @phi_feeding_phi_args(i8 %a, i8 %b) {
entry:
%0 = icmp ugt i8 %a, %b
br i1 %0, label %preheader, label %empty
empty:
br label %preheader
preheader:
%1 = phi i8 [ %a, %entry ], [ %b, %empty ]
br label %loop
loop:
%val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
%cmp = icmp ult i8 %val, 254
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = sub nuw i8 %val, 2
br label %if.end
if.else:
%inc1 = shl nuw i8 %val, 1
br label %if.end
if.end:
%inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp eq i8 %inc2, 255
br i1 %cmp1, label %exit, label %loop
exit:
ret void
}
; Same as above, but as the args are zeroext, we shouldn't see any uxts.
; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args
; CHECK-COMMON-NOT: uxt
define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) {
entry:
%0 = icmp ugt i8 %a, %b
br i1 %0, label %preheader, label %empty
empty:
br label %preheader
preheader:
%1 = phi i8 [ %a, %entry ], [ %b, %empty ]
br label %loop
loop:
%val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
%cmp = icmp ult i8 %val, 254
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = sub nuw i8 %val, 2
br label %if.end
if.else:
%inc1 = shl nuw i8 %val, 1
br label %if.end
if.end:
%inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp eq i8 %inc2, 255
br i1 %cmp1, label %exit, label %loop
exit:
ret void
}
; Just check that phis also work with i16s.
; CHECK-COMMON-LABEL: phi_i16:
; CHECK-COMMON-NOT: uxt
define void @phi_i16() {
entry:
br label %loop
loop:
%val = phi i16 [ 0, %entry ], [ %inc2, %if.end ]
%cmp = icmp ult i16 %val, 128
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = add nuw i16 %val, 2
br label %if.end
if.else:
%inc1 = add nuw i16 %val, 1
br label %if.end
if.end:
%inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp ult i16 %inc2, 253
br i1 %cmp1, label %loop, label %exit
exit:
ret void
}
; CHECK-COMMON-LABEL: ret_i8
; CHECK-COMMON-NOT: uxt
define i8 @ret_i8() {
entry:
br label %loop
loop:
%val = phi i8 [ 0, %entry ], [ %inc2, %if.end ]
%cmp = icmp ult i8 %val, 128
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = add nuw i8 %val, 2
br label %if.end
if.else:
%inc1 = add nuw i8 %val, 1
br label %if.end
if.end:
%inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
%cmp1 = icmp ult i8 %inc2, 253
br i1 %cmp1, label %exit, label %loop
exit:
ret i8 %inc2
}
; CHECK-COMMON-LABEL: phi_multiple_undefs
; CHECK-COMMON-NOT: uxt
define i16 @phi_multiple_undefs(i16 zeroext %arg) {
entry:
br label %loop
loop:
%val = phi i16 [ undef, %entry ], [ %inc2, %if.end ]
%cmp = icmp ult i16 %val, 128
br i1 %cmp, label %if.then, label %if.else
if.then:
%inc = add nuw i16 %val, 2
br label %if.end
if.else:
%inc1 = add nuw i16 %val, 1
br label %if.end
if.end:
%inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
%unrelated = phi i16 [ undef, %if.then ], [ %arg, %if.else ]
%cmp1 = icmp ult i16 %inc2, 253
br i1 %cmp1, label %loop, label %exit
exit:
ret i16 %unrelated
}

View File

@ -0,0 +1,84 @@
; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s
; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s
; CHECK-LABEL: phi_pointers
; CHECK-NOT: uxt
define void @phi_pointers(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) {
entry:
%add = add nuw i8 %M, 1
%and = and i8 %add, 1
%cmp = icmp ugt i8 %add, %N
%base = select i1 %cmp, i16* %a, i16* %b
%other = select i1 %cmp, i16* %b, i16* %b
br label %loop
loop:
%ptr = phi i16* [ %base, %entry ], [ %gep, %loop ]
%idx = phi i8 [ %and, %entry ], [ %inc, %loop ]
%load = load i16, i16* %ptr, align 2
%inc = add nuw nsw i8 %idx, 1
%gep = getelementptr inbounds i16, i16* %ptr, i8 %inc
%cond = icmp eq i16* %gep, %other
br i1 %cond, label %exit, label %loop
exit:
ret void
}
; CHECK-LABEL: phi_pointers_null
; CHECK-NOT: uxt
define void @phi_pointers_null(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) {
entry:
%add = add nuw i8 %M, 1
%and = and i8 %add, 1
%cmp = icmp ugt i8 %add, %N
%base = select i1 %cmp, i16* %a, i16* %b
%other = select i1 %cmp, i16* %b, i16* %b
%cmp.1 = icmp eq i16* %base, %other
br i1 %cmp.1, label %fail, label %loop
fail:
br label %loop
loop:
%ptr = phi i16* [ %base, %entry ], [ null, %fail ], [ %gep, %if.then ]
%idx = phi i8 [ %and, %entry ], [ 0, %fail ], [ %inc, %if.then ]
%undef = icmp eq i16* %ptr, undef
br i1 %undef, label %exit, label %if.then
if.then:
%load = load i16, i16* %ptr, align 2
%inc = add nuw nsw i8 %idx, 1
%gep = getelementptr inbounds i16, i16* %ptr, i8 %inc
%cond = icmp eq i16* %gep, %other
br i1 %cond, label %exit, label %loop
exit:
ret void
}
declare i8 @do_something_with_ptr(i8, i16*)
; CHECK-LABEL: call_pointer
; CHECK-NOT: uxt
define i8 @call_pointer(i8 zeroext %x, i8 zeroext %y, i16* %a, i16* %b) {
%or = or i8 %x, %y
%shr = lshr i8 %or, 1
%add = add nuw i8 %shr, 2
%cmp = icmp ne i8 %add, 0
%ptr = select i1 %cmp, i16* %a, i16* %b
%call = tail call zeroext i8 @do_something_with_ptr(i8 %shr, i16* %ptr)
ret i8 %call
}
; CHECK-LABEL: pointer_to_pointer
; CHECK-NOT: uxt
define i16 @pointer_to_pointer(i16** %arg, i16 zeroext %limit) {
entry:
%addr = load i16*, i16** %arg
%val = load i16, i16* %addr
%add = add nuw i16 %val, 7
%cmp = icmp ult i16 %add, 256
%res = select i1 %cmp, i16 128, i16 255
ret i16 %res
}