From 5ab555532b5aaee281bded6341e240d3bc6261df Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Mon, 9 Sep 2013 20:56:48 +0000 Subject: [PATCH] [InstCombiner] Expose opportunities to merge subtract and comparison. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several architectures use the same instruction to perform both a comparison and a subtract. The instruction selection framework does not allow to consider different basic blocks to expose such fusion opportunities. Therefore, these instructions are “merged” by CSE at MI IR level. To increase the likelihood of CSE to apply in such situation, we reorder the operands of the comparison, when they have the same complexity, so that they matches the order of the most frequent subtract. E.g., icmp A, B ... sub B, A llvm-svn: 190352 --- .../InstCombine/InstCombineCompares.cpp | 47 +++++++++++++- llvm/test/Transforms/InstCombine/icmp.ll | 65 +++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 18a08725039a..29de6f7b8894 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2037,14 +2037,59 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, } +/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst +/// should be swapped. +/// The descision is based on how many times these two operands are reused +/// as subtract operands and their positions in those instructions. +/// The rational is that several architectures use the same instruction for +/// both subtract and cmp, thus it is better if the order of those operands +/// match. +/// \return true if Op0 and Op1 should be swapped. +static bool swapMayExposeCSEOpportunities(const Value * Op0, + const Value * Op1) { + // Filter out pointer value as those cannot appears directly in subtract. + // FIXME: we may want to go through inttoptrs or bitcasts. + if (Op0->getType()->isPointerTy()) + return false; + // Count every uses of both Op0 and Op1 in a subtract. + // Each time Op0 is the first operand, count -1: swapping is bad, the + // subtract has already the same layout as the compare. + // Each time Op0 is the second operand, count +1: swapping is good, the + // subtract has a diffrent layout as the compare. + // At the end, if the benefit is greater than 0, Op0 should come second to + // expose more CSE opportunities. + int GlobalSwapBenefits = 0; + for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) { + const BinaryOperator *BinOp = dyn_cast(*UI); + if (!BinOp || BinOp->getOpcode() != Instruction::Sub) + continue; + // If Op0 is the first argument, this is not beneficial to swap the + // arguments. + int LocalSwapBenefits = -1; + unsigned Op1Idx = 1; + if (BinOp->getOperand(Op1Idx) == Op0) { + Op1Idx = 0; + LocalSwapBenefits = 1; + } + if (BinOp->getOperand(Op1Idx) != Op1) + continue; + GlobalSwapBenefits += LocalSwapBenefits; + } + return GlobalSwapBenefits > 0; +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + unsigned Op0Cplxity = getComplexity(Op0); + unsigned Op1Cplxity = getComplexity(Op1); /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. - if (getComplexity(Op0) < getComplexity(Op1)) { + if (Op0Cplxity < Op1Cplxity || + (Op0Cplxity == Op1Cplxity && + swapMayExposeCSEOpportunities(Op0, Op1))) { I.swapOperands(); std::swap(Op0, Op1); Changed = true; diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 33636c47d3dc..bd8128ffb1b0 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -1271,3 +1271,68 @@ define i1 @icmp_sub_-1_X_uge_4(i32 %X) { %cmp = icmp uge i32 %sub, 4 ret i1 %cmp } + +; CHECK-LABEL: @icmp_swap_operands_for_cse +; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y +; CHECK-NEXT: br i1 [[CMP]], label %true, label %false +; CHECK: ret i1 +define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) { +entry: + %sub = sub i32 %X, %Y + %cmp = icmp ugt i32 %Y, %X + br i1 %cmp, label %true, label %false +true: + %restrue = trunc i32 %sub to i1 + br label %end +false: + %shift = lshr i32 %sub, 4 + %resfalse = trunc i32 %shift to i1 + br label %end +end: + %res = phi i1 [%restrue, %true], [%resfalse, %false] + ret i1 %res +} + +; CHECK-LABEL: @icmp_swap_operands_for_cse2 +; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y +; CHECK-NEXT: br i1 [[CMP]], label %true, label %false +; CHECK: ret i1 +define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) { +entry: + %cmp = icmp ugt i32 %Y, %X + br i1 %cmp, label %true, label %false +true: + %sub = sub i32 %X, %Y + %sub1 = sub i32 %X, %Y + %add = add i32 %sub, %sub1 + %restrue = trunc i32 %add to i1 + br label %end +false: + %sub2 = sub i32 %Y, %X + %resfalse = trunc i32 %sub2 to i1 + br label %end +end: + %res = phi i1 [%restrue, %true], [%resfalse, %false] + ret i1 %res +} + +; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse +; CHECK: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %Y, %X +; CHECK-NEXT: br i1 [[CMP]], label %true, label %false +; CHECK: ret i1 +define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) { +entry: + %cmp = icmp ugt i32 %Y, %X + br i1 %cmp, label %true, label %false +true: + %sub = sub i32 %X, %Y + %restrue = trunc i32 %sub to i1 + br label %end +false: + %sub2 = sub i32 %Y, %X + %resfalse = trunc i32 %sub2 to i1 + br label %end +end: + %res = phi i1 [%restrue, %true], [%resfalse, %false] + ret i1 %res +}