[Reassociate] Don't convert add-like-or's into add's if they appear to be part of load-combining idiom

As Wei Mi is reporting in post-commit review
  https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20201116/853479.html
teaching -reassociate about add-like-or's (70472f3) results in breaking apart
load widening patterns, and reassociating them.

For now, simply exclude any such `or` that appears to be a root of
load widening idiom from the or->add transformation.

Note that the heuristic is greedy, it doesn't ensure that loads
can *actually* be widened into a single load.
This commit is contained in:
Roman Lebedev 2020-11-18 17:21:04 +03:00
parent 1a3428163d
commit 34ff90ad5d
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
2 changed files with 66 additions and 8 deletions

View File

@ -920,8 +920,66 @@ static Value *NegateValue(Value *V, Instruction *BI,
return NewNeg;
}
// See if this `or` looks like an load widening reduction, i.e. that it
// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
// ensure that the pattern is *really* a load widening reduction,
// we do not ensure that it can really be replaced with a widened load,
// only that it mostly looks like one.
static bool isLoadCombineCandidate(Instruction *Or) {
SmallVector<Instruction *, 8> Worklist;
SmallSet<Instruction *, 8> Visited;
auto Enqueue = [&](Value *V) {
auto *I = dyn_cast<Instruction>(V);
// Each node of an `or` reduction must be an instruction,
if (!I)
return false; // Node is certainly not part of an `or` load reduction.
// Only process instructions we have never processed before.
if (Visited.insert(I).second)
Worklist.emplace_back(I);
return true; // Will need to look at parent nodes.
};
if (!Enqueue(Or))
return false; // Not an `or` reduction pattern.
while (!Worklist.empty()) {
auto *I = Worklist.pop_back_val();
// Okay, which instruction is this node?
switch (I->getOpcode()) {
case Instruction::Or:
// Got an `or` node. That's fine, just recurse into it's operands.
for (Value *Op : I->operands())
if (!Enqueue(Op))
return false; // Not an `or` reduction pattern.
continue;
case Instruction::Shl:
case Instruction::ZExt:
// `shl`/`zext` nodes are fine, just recurse into their base operand.
if (!Enqueue(I->getOperand(0)))
return false; // Not an `or` reduction pattern.
continue;
case Instruction::Load:
// Perfect, `load` node means we've reached an edge of the graph.
continue;
default: // Unknown node.
return false; // Not an `or` reduction pattern.
}
}
return true;
}
/// Return true if it may be profitable to convert this (X|Y) into (X+Y).
static bool ShouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
// If this `or` appears to be a part of an load widening reduction, ignore it.
if (isLoadCombineCandidate(Or))
return false;
// Don't bother to convert this up unless either the LHS is an associable add
// or subtract or mul or if this is only used by one of the above.
// This is only a compile-time improvement, it is not needed for correctness!

View File

@ -10,8 +10,8 @@ define i16 @p0_i8_i8_i16(i8* %ptr) {
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 8
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I4]]
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
; CHECK-NEXT: ret i16 [[I8]]
;
%i = getelementptr inbounds i8, i8* %ptr, i64 1
@ -34,8 +34,8 @@ define i16 @p1_i8_i8_i16_swapped(i8* %ptr) {
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 1
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[I4]], align 1
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I3]]
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I3]], [[I6]]
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
; CHECK-NEXT: ret i16 [[I8]]
;
%i = load i8, i8* %ptr
@ -58,8 +58,8 @@ define i16 @p2(i8* %ptr) {
; CHECK-NEXT: [[I4:%.*]] = shl i16 [[I3]], 9
; CHECK-NEXT: [[I5:%.*]] = load i8, i8* [[PTR]], align 1
; CHECK-NEXT: [[I6:%.*]] = zext i8 [[I5]] to i16
; CHECK-NEXT: [[I7:%.*]] = add i16 [[I6]], 42
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], [[I4]]
; CHECK-NEXT: [[I7:%.*]] = or i16 [[I4]], [[I6]]
; CHECK-NEXT: [[I8:%.*]] = add i16 [[I7]], 42
; CHECK-NEXT: ret i16 [[I8]]
;
%i = getelementptr inbounds i8, i8* %ptr, i64 1
@ -79,8 +79,8 @@ define i16 @p3(i8* %ptr) {
; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[PTR:%.*]], align 1
; CHECK-NEXT: [[I2:%.*]] = zext i8 [[I]] to i16
; CHECK-NEXT: [[I3:%.*]] = shl i16 [[I2]], 8
; CHECK-NEXT: [[I4:%.*]] = add i16 [[I2]], 42
; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], [[I3]]
; CHECK-NEXT: [[I4:%.*]] = or i16 [[I3]], [[I2]]
; CHECK-NEXT: [[I5:%.*]] = add i16 [[I4]], 42
; CHECK-NEXT: ret i16 [[I5]]
;
%i = load i8, i8* %ptr