forked from OSchip/llvm-project
[Reassociate] Don't convert add-like-or's into add's if they appear to be part of load-combining idiom
As Wei Mi is reporting in post-commit review
https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20201116/853479.html
teaching -reassociate about add-like-or's (70472f3
) results in breaking apart
load widening patterns, and reassociating them.
For now, simply exclude any such `or` that appears to be a root of
load widening idiom from the or->add transformation.
Note that the heuristic is greedy, it doesn't ensure that loads
can *actually* be widened into a single load.
This commit is contained in:
parent
1a3428163d
commit
34ff90ad5d
|
@ -920,8 +920,66 @@ static Value *NegateValue(Value *V, Instruction *BI,
|
|||
return NewNeg;
|
||||
}
|
||||
|
||||
// See if this `or` looks like an load widening reduction, i.e. that it
|
||||
// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
|
||||
// ensure that the pattern is *really* a load widening reduction,
|
||||
// we do not ensure that it can really be replaced with a widened load,
|
||||
// only that it mostly looks like one.
|
||||
static bool isLoadCombineCandidate(Instruction *Or) {
|
||||
SmallVector<Instruction *, 8> Worklist;
|
||||
SmallSet<Instruction *, 8> Visited;
|
||||
|
||||
auto Enqueue = [&](Value *V) {
|
||||
auto *I = dyn_cast<Instruction>(V);
|
||||
// Each node of an `or` reduction must be an instruction,
|
||||
if (!I)
|
||||
return false; // Node is certainly not part of an `or` load reduction.
|
||||
// Only process instructions we have never processed before.
|
||||
if (Visited.insert(I).second)
|
||||
Worklist.emplace_back(I);
|
||||
return true; // Will need to look at parent nodes.
|
||||
};
|
||||
|
||||
if (!Enqueue(Or))
|
||||
return false; // Not an `or` reduction pattern.
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
auto *I = Worklist.pop_back_val();
|
||||
|
||||
// Okay, which instruction is this node?
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::Or:
|
||||
// Got an `or` node. That's fine, just recurse into it's operands.
|
||||
for (Value *Op : I->operands())
|
||||
if (!Enqueue(Op))
|
||||
return false; // Not an `or` reduction pattern.
|
||||
continue;
|
||||
|
||||
case Instruction::Shl:
|
||||
case Instruction::ZExt:
|
||||
// `shl`/`zext` nodes are fine, just recurse into their base operand.
|
||||
if (!Enqueue(I->getOperand(0)))
|
||||
return false; // Not an `or` reduction pattern.
|
||||
continue;
|
||||
|
||||
case Instruction::Load:
|
||||
// Perfect, `load` node means we've reached an edge of the graph.
|
||||
continue;
|
||||
|
||||
default: // Unknown node.
|
||||
return false; // Not an `or` reduction pattern.
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
|
||||
static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
|
||||
// If this `or` appears to be a part of an load widening reduction, ignore it.
|
||||
if (isLoadCombineCandidate(Or))
|
||||
return false;
|
||||
|
||||
// Don't bother to convert this up unless either the LHS is an associable add
|
||||
// or subtract or mul or if this is only used by one of the above.
|
||||
// This is only a compile-time improvement, it is not needed for correctness!
|
||||
|
|
|
@ -10,8 +10,8 @@ define i16 @p0_i8_i8_i16(i8* %ptr) {
|
|||
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 8
|
||||
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
|
||||
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
|
||||
; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
|
||||
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I4]]
|
||||
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
|
||||
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
|
||||
; CHECK-NEXT: ret i16 [[I8]]
|
||||
;
|
||||
%i = getelementptr inbounds i8, i8* %ptr, i64 1
|
||||
|
@ -34,8 +34,8 @@ define i16 @p1_i8_i8_i16_swapped(i8* %ptr) {
|
|||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[I4]], align 1
|
||||
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
|
||||
; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
|
||||
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I3]]
|
||||
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I3]], [[I6]]
|
||||
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
|
||||
; CHECK-NEXT: ret i16 [[I8]]
|
||||
;
|
||||
%i = load i8, i8* %ptr
|
||||
|
@ -58,8 +58,8 @@ define i16 @p2(i8* %ptr) {
|
|||
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 9
|
||||
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
|
||||
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
|
||||
; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
|
||||
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I4]]
|
||||
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
|
||||
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
|
||||
; CHECK-NEXT: ret i16 [[I8]]
|
||||
;
|
||||
%i = getelementptr inbounds i8, i8* %ptr, i64 1
|
||||
|
@ -79,8 +79,8 @@ define i16 @p3(i8* %ptr) {
|
|||
; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
|
||||
; CHECK-NEXT: [[I2:%.*]] = zext i8 [[I]] to i16
|
||||
; CHECK-NEXT: [[I3:%.*]] = shl i16 [[I2]], 8
|
||||
; CHECK-NEXT: [[I4:%.*]] = add i16 [[I2]], 42
|
||||
; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], [[I3]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = or i16 [[I3]], [[I2]]
|
||||
; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], 42
|
||||
; CHECK-NEXT: ret i16 [[I5]]
|
||||
;
|
||||
%i = load i8, i8* %ptr
|
||||
|
|
Loading…
Reference in New Issue