forked from OSchip/llvm-project
[SDAG] try to fold one-demanded-bit-of-multiply
This is a translation of the transform added to InstCombine with: D118539
This commit is contained in:
parent
6a929492a6
commit
d1ecfaa097
|
@ -2265,6 +2265,19 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
break;
|
||||
}
|
||||
case ISD::MUL:
|
||||
if (DemandedBits.isPowerOf2()) {
|
||||
// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
|
||||
// If we demand exactly one bit N and we have "X * (C' << N)" where C' is
|
||||
// odd (has LSB set), then the left-shifted low bit of X is the answer.
|
||||
unsigned CTZ = DemandedBits.countTrailingZeros();
|
||||
ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
|
||||
if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
|
||||
EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
|
||||
SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
|
||||
SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
|
||||
return TLO.CombineTo(Op, Shl);
|
||||
}
|
||||
}
|
||||
// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
|
||||
// X * X is odd iff X is odd.
|
||||
// 'Quadratic Reciprocity': X * X -> 0 for bit[1]
|
||||
|
|
|
@ -66,7 +66,7 @@ define <4 x i32> @combine_mul_self_demandedbits_vector(<4 x i32> %x) {
|
|||
define i8 @one_demanded_bit(i8 %x) {
|
||||
; CHECK-LABEL: one_demanded_bit:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg w8, w0, lsl #6
|
||||
; CHECK-NEXT: lsl w8, w0, #6
|
||||
; CHECK-NEXT: orr w0, w8, #0xffffffbf
|
||||
; CHECK-NEXT: ret
|
||||
%m = mul i8 %x, 192 ; 0b1100_0000
|
||||
|
@ -77,16 +77,9 @@ define i8 @one_demanded_bit(i8 %x) {
|
|||
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
|
||||
; CHECK-LABEL: one_demanded_bit_splat:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-NEXT: add x8, x8, x8, lsl #2
|
||||
; CHECK-NEXT: lsl x8, x8, #5
|
||||
; CHECK-NEXT: add x9, x9, x9, lsl #2
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: lsl x8, x9, #5
|
||||
; CHECK-NEXT: mov w9, #32
|
||||
; CHECK-NEXT: mov v0.d[1], x8
|
||||
; CHECK-NEXT: dup v1.2d, x9
|
||||
; CHECK-NEXT: mov w8, #32
|
||||
; CHECK-NEXT: shl v0.2d, v0.2d, #5
|
||||
; CHECK-NEXT: dup v1.2d, x8
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
%m = mul <2 x i64> %x, <i64 160, i64 160> ; 0b1010_0000
|
||||
|
@ -97,8 +90,7 @@ define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
|
|||
define i32 @one_demanded_low_bit(i32 %x) {
|
||||
; CHECK-LABEL: one_demanded_low_bit:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: neg w8, w0
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: and w0, w0, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
%m = mul i32 %x, -63 ; any odd number will do
|
||||
%r = and i32 %m, 1
|
||||
|
|
Loading…
Reference in New Issue