From cc5fc44d0203c25729c631e273a45efdbb7b5d98 Mon Sep 17 00:00:00 2001 From: Dale Johannesen Date: Wed, 11 Feb 2009 19:19:41 +0000 Subject: [PATCH] Make a transformation added in 63266 a bit less aggressive. It was transforming (x&y)==y to (x&y)!=0 in the case where y is variable and known to have at most one bit set (e.g. z&1). This is not correct; the expressions are not equivalent when y==0. I believe this patch salvages what can be salvaged, including all the cases in bt.ll. Dan, please review. Fixes gcc.c-torture/execute/20040709-[12].c llvm-svn: 64314 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 14 ++-- llvm/test/CodeGen/X86/nobt.ll | 70 +++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/X86/nobt.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d7008923c159..fce57f93b7f8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1346,20 +1346,21 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } -static bool ValueHasAtMostOneBitSet(SDValue Val, const SelectionDAG &DAG) { +static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // Logical shift right or left won't ever introduce new set bits. // We check for this case because we don't care which bits are // set, but ComputeMaskedBits won't know anything unless it can // determine which specific bits may be set. if (Val.getOpcode() == ISD::SHL || Val.getOpcode() == ISD::SRL) - return ValueHasAtMostOneBitSet(Val.getOperand(0), DAG); + return ValueHasExactlyOneBitSet(Val.getOperand(0), DAG); MVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getSizeInBits(); APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); - return KnownZero.countPopulation() == BitWidth - 1; + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); } /// SimplifySetCC - Try to simplify a setcc built with the specified operands @@ -1832,9 +1833,12 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, } // Simplify x&y == y to x&y != 0 if y has exactly one bit set. + // Note that where y is variable and is known to have at most + // one bit set (for example, if it is z&1) we cannot do this; + // the expressions are not equivalent when y==0. if (N0.getOpcode() == ISD::AND) if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { - if (ValueHasAtMostOneBitSet(N1, DAG)) { + if (ValueHasExactlyOneBitSet(N1, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); SDValue Zero = DAG.getConstant(0, N1.getValueType()); return DAG.getSetCC(dl, VT, N0, Zero, Cond); @@ -1842,7 +1846,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, } if (N1.getOpcode() == ISD::AND) if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { - if (ValueHasAtMostOneBitSet(N0, DAG)) { + if (ValueHasExactlyOneBitSet(N0, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); SDValue Zero = DAG.getConstant(0, N0.getValueType()); return DAG.getSetCC(dl, VT, N1, Zero, Cond); diff --git a/llvm/test/CodeGen/X86/nobt.ll b/llvm/test/CodeGen/X86/nobt.ll new file mode 100644 index 000000000000..55294280f5c8 --- /dev/null +++ b/llvm/test/CodeGen/X86/nobt.ll @@ -0,0 +1,70 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep btl + +; This tests some cases where BT must not be generated. See also bt.ll. +; Fixes 20040709-[12].c in gcc testsuite. + +define void @test2(i32 %x, i32 %n) nounwind { +entry: + %tmp1 = and i32 %x, 1 + %tmp2 = urem i32 %tmp1, 15 + %tmp3 = and i32 %tmp2, 1 ; [#uses=1] + %tmp4 = icmp eq i32 %tmp3, %tmp2 ; [#uses=1] + br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb: ; preds = %entry + call void @foo() + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +define void @test3(i32 %x, i32 %n) nounwind { +entry: + %tmp1 = and i32 %x, 1 + %tmp2 = urem i32 %tmp1, 15 + %tmp3 = and i32 %tmp2, 1 ; [#uses=1] + %tmp4 = icmp eq i32 %tmp2, %tmp3 ; [#uses=1] + br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb: ; preds = %entry + call void @foo() + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +define void @test4(i32 %x, i32 %n) nounwind { +entry: + %tmp1 = and i32 %x, 1 + %tmp2 = urem i32 %tmp1, 15 + %tmp3 = and i32 %tmp2, 1 ; [#uses=1] + %tmp4 = icmp ne i32 %tmp2, %tmp3 ; [#uses=1] + br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb: ; preds = %entry + call void @foo() + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +define void @test5(i32 %x, i32 %n) nounwind { +entry: + %tmp1 = and i32 %x, 1 + %tmp2 = urem i32 %tmp1, 15 + %tmp3 = and i32 %tmp2, 1 ; [#uses=1] + %tmp4 = icmp ne i32 %tmp2, %tmp3 ; [#uses=1] + br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb: ; preds = %entry + call void @foo() + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +declare void @foo()