forked from OSchip/llvm-project
[SDAG] try to replace subtract-from-constant with xor
This is almost the same as the abandoned D48529, but it allows splat vector constants too. This replaces the x86-specific code that was added with the alternate patch D48557 with the original generic combine. This transform is a less restricted form of an existing InstCombine and the proposed SDAG equivalent for that in D128080: https://alive2.llvm.org/ce/z/OUm6N_ Differential Revision: https://reviews.llvm.org/D128123
This commit is contained in:
parent
fee77a2073
commit
8b75671314
|
@ -3762,6 +3762,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
// If there's no chance any bit will need to borrow from an adjacent bit:
|
||||
// sub C, X --> xor X, C
|
||||
if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
|
||||
if (!C0->isOpaque() &&
|
||||
(~DAG.computeKnownBits(N1).Zero).isSubsetOf(C0->getAPIntValue()))
|
||||
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -1531,44 +1531,6 @@ def : Pat<(xor GR32:$src1, -2147483648),
|
|||
(ADD32ri GR32:$src1, -2147483648)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pattern match SUB as XOR
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// An immediate in the LHS of a subtract can't be encoded in the instruction.
|
||||
// If there is no possibility of a borrow we can use an XOR instead of a SUB
|
||||
// to enable the immediate to be folded.
|
||||
// TODO: Move this to a DAG combine?
|
||||
|
||||
def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{
|
||||
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
|
||||
KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1));
|
||||
|
||||
// If all possible ones in the RHS are set in the LHS then there can't be
|
||||
// a borrow and we can use xor.
|
||||
return (~Known.Zero).isSubsetOf(CN->getAPIntValue());
|
||||
}
|
||||
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
let AddedComplexity = 5 in {
|
||||
def : Pat<(sub_is_xor imm:$src2, GR8:$src1),
|
||||
(XOR8ri GR8:$src1, imm:$src2)>;
|
||||
def : Pat<(sub_is_xor i16immSExt8:$src2, GR16:$src1),
|
||||
(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
|
||||
def : Pat<(sub_is_xor imm:$src2, GR16:$src1),
|
||||
(XOR16ri GR16:$src1, imm:$src2)>;
|
||||
def : Pat<(sub_is_xor i32immSExt8:$src2, GR32:$src1),
|
||||
(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
|
||||
def : Pat<(sub_is_xor imm:$src2, GR32:$src1),
|
||||
(XOR32ri GR32:$src1, imm:$src2)>;
|
||||
def : Pat<(sub_is_xor i64immSExt8:$src2, GR64:$src1),
|
||||
(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
|
||||
def : Pat<(sub_is_xor i64immSExt32:$src2, GR64:$src1),
|
||||
(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Some peepholes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -18,9 +18,8 @@ define i8 @masked_sub_i8(i8 %x) {
|
|||
; CHECK-LABEL: masked_sub_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #5
|
||||
; CHECK-NEXT: mov w9, #7
|
||||
; CHECK-NEXT: and w8, w0, w8
|
||||
; CHECK-NEXT: sub w0, w9, w8
|
||||
; CHECK-NEXT: eor w0, w8, #0x7
|
||||
; CHECK-NEXT: ret
|
||||
%a = and i8 %x, 5
|
||||
%m = sub i8 7, %a
|
||||
|
@ -43,9 +42,8 @@ define i32 @masked_sub_i32(i32 %x) {
|
|||
; CHECK-LABEL: masked_sub_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #9
|
||||
; CHECK-NEXT: mov w9, #31
|
||||
; CHECK-NEXT: and w8, w0, w8
|
||||
; CHECK-NEXT: sub w0, w9, w8
|
||||
; CHECK-NEXT: eor w0, w8, #0x1f
|
||||
; CHECK-NEXT: ret
|
||||
%a = and i32 %x, 9
|
||||
%m = sub i32 31, %a
|
||||
|
@ -58,7 +56,7 @@ define <4 x i32> @masked_sub_v4i32(<4 x i32> %x) {
|
|||
; CHECK-NEXT: movi v1.4s, #42
|
||||
; CHECK-NEXT: movi v2.4s, #1, msl #8
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
|
||||
; CHECK-NEXT: ret
|
||||
%a = and <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
|
||||
%m = sub <4 x i32> <i32 511, i32 511, i32 511, i32 511>, %a
|
||||
|
|
|
@ -248,38 +248,40 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; this could have the offset transform, but sub became xor
|
||||
|
||||
define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
|
||||
; CI-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
|
||||
; CI: ; %bb.0:
|
||||
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||||
; CI-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; CI-NEXT: v_mov_b32_e32 v1, 13
|
||||
; CI-NEXT: s_mov_b32 m0, -1
|
||||
; CI-NEXT: ds_write_b8 v0, v1 offset:65535
|
||||
; CI-NEXT: ds_write_b8 v0, v1
|
||||
; CI-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0
|
||||
; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 13
|
||||
; GFX9-NEXT: ds_write_b8 v0, v1 offset:65535
|
||||
; GFX9-NEXT: ds_write_b8 v0, v1
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 13
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0
|
||||
; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535
|
||||
; GFX10-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; GFX10-NEXT: ds_write_b8 v0, v1
|
||||
; GFX10-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
|
||||
; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535
|
||||
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; GFX11-NEXT: ds_store_b8 v0, v1
|
||||
; GFX11-NEXT: s_endpgm
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%.neg = mul i32 %x.i, -4
|
||||
|
@ -290,38 +292,40 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; this could have the offset transform, but sub became xor
|
||||
|
||||
define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
|
||||
; CI-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
|
||||
; CI: ; %bb.0:
|
||||
; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
|
||||
; CI-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; CI-NEXT: v_mov_b32_e32 v1, 13
|
||||
; CI-NEXT: s_mov_b32 m0, -1
|
||||
; CI-NEXT: ds_write_b8 v0, v1 offset:65535
|
||||
; CI-NEXT: ds_write_b8 v0, v1
|
||||
; CI-NEXT: s_endpgm
|
||||
;
|
||||
; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0
|
||||
; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 13
|
||||
; GFX9-NEXT: ds_write_b8 v0, v1 offset:65535
|
||||
; GFX9-NEXT: ds_write_b8 v0, v1
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, 13
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0
|
||||
; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535
|
||||
; GFX10-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; GFX10-NEXT: ds_write_b8 v0, v1
|
||||
; GFX10-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
|
||||
; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535
|
||||
; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
|
||||
; GFX11-NEXT: ds_store_b8 v0, v1
|
||||
; GFX11-NEXT: s_endpgm
|
||||
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%neg = sub i32 0, %x.i
|
||||
|
|
|
@ -15,10 +15,10 @@ define i32 @f() {
|
|||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: ds_read_b32 v0, v0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
||||
; CHECK-NEXT: s_cmpk_lg_u32 vcc_lo, 0x0
|
||||
; CHECK-NEXT: s_subb_u32 s4, 1, 0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, s4, vcc_lo
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
%i = load i32, i32 addrspace(3)* null, align 16
|
||||
|
|
|
@ -58,18 +58,18 @@ define i32 @usub_overflow(i32 %a, i32 %b) #0 {
|
|||
; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
|
||||
; ARM: mov r[[R2:[0-9]+]], #0
|
||||
; ARM: adc r[[R0]], r[[R2]], #0
|
||||
; ARM: rsb r[[R0]], r[[R0]], #1
|
||||
; ARM: eor r[[R0]], r[[R0]], #1
|
||||
|
||||
; THUMBV6: movs r[[R2:[0-9]+]], #0
|
||||
; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
|
||||
; THUMBV6: adcs r[[R2]], r[[R2]]
|
||||
; THUMBV6: movs r[[R0]], #1
|
||||
; THUMBV6: subs r[[R0]], r[[R0]], r[[R2]]
|
||||
; THUMBV6: eors r[[R0]], r[[R2]]
|
||||
|
||||
; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
|
||||
; THUMBV7: mov.w r[[R2:[0-9]+]], #0
|
||||
; THUMBV7: adc r[[R0]], r[[R2]], #0
|
||||
; THUMBV7: rsb.w r[[R0]], r[[R0]], #1
|
||||
; THUMBV7: eor r[[R0]], r[[R0]], #1
|
||||
|
||||
; We should know that the overflow is just 1 bit,
|
||||
; no need to clear any other bit
|
||||
|
|
|
@ -48,7 +48,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
|||
; CHECK-T1-NEXT: mov r0, r1
|
||||
; CHECK-T1-NEXT: adcs r0, r1
|
||||
; CHECK-T1-NEXT: movs r3, #1
|
||||
; CHECK-T1-NEXT: subs r3, r3, r0
|
||||
; CHECK-T1-NEXT: eors r3, r0
|
||||
; CHECK-T1-NEXT: mov r0, r1
|
||||
; CHECK-T1-NEXT: beq .LBB1_3
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
|
@ -70,7 +70,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
|||
; CHECK-T2-NEXT: mov.w r12, #0
|
||||
; CHECK-T2-NEXT: sbcs r1, r3
|
||||
; CHECK-T2-NEXT: adc r2, r12, #0
|
||||
; CHECK-T2-NEXT: rsbs.w r2, r2, #1
|
||||
; CHECK-T2-NEXT: eors r2, r2, #1
|
||||
; CHECK-T2-NEXT: itt ne
|
||||
; CHECK-T2-NEXT: movne r0, #0
|
||||
; CHECK-T2-NEXT: movne r1, #0
|
||||
|
@ -82,7 +82,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
|||
; CHECK-ARM-NEXT: mov r12, #0
|
||||
; CHECK-ARM-NEXT: sbcs r1, r1, r3
|
||||
; CHECK-ARM-NEXT: adc r2, r12, #0
|
||||
; CHECK-ARM-NEXT: rsbs r2, r2, #1
|
||||
; CHECK-ARM-NEXT: eors r2, r2, #1
|
||||
; CHECK-ARM-NEXT: movwne r0, #0
|
||||
; CHECK-ARM-NEXT: movwne r1, #0
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
|
|
|
@ -54,7 +54,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
|||
; CHECK-T1-NEXT: mov r0, r1
|
||||
; CHECK-T1-NEXT: adcs r0, r1
|
||||
; CHECK-T1-NEXT: movs r4, #1
|
||||
; CHECK-T1-NEXT: subs r4, r4, r0
|
||||
; CHECK-T1-NEXT: eors r4, r0
|
||||
; CHECK-T1-NEXT: mov r0, r1
|
||||
; CHECK-T1-NEXT: beq .LBB1_3
|
||||
; CHECK-T1-NEXT: @ %bb.1:
|
||||
|
@ -77,7 +77,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
|||
; CHECK-T2-NEXT: subs r0, r0, r2
|
||||
; CHECK-T2-NEXT: sbcs r1, r3
|
||||
; CHECK-T2-NEXT: adc r2, r12, #0
|
||||
; CHECK-T2-NEXT: rsbs.w r2, r2, #1
|
||||
; CHECK-T2-NEXT: eors r2, r2, #1
|
||||
; CHECK-T2-NEXT: itt ne
|
||||
; CHECK-T2-NEXT: movne r0, #0
|
||||
; CHECK-T2-NEXT: movne r1, #0
|
||||
|
@ -91,7 +91,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
|||
; CHECK-ARM-NEXT: subs r0, r0, r2
|
||||
; CHECK-ARM-NEXT: sbcs r1, r1, r3
|
||||
; CHECK-ARM-NEXT: adc r2, r12, #0
|
||||
; CHECK-ARM-NEXT: rsbs r2, r2, #1
|
||||
; CHECK-ARM-NEXT: eors r2, r2, #1
|
||||
; CHECK-ARM-NEXT: movwne r0, #0
|
||||
; CHECK-ARM-NEXT: movwne r1, #0
|
||||
; CHECK-ARM-NEXT: bx lr
|
||||
|
|
|
@ -45,8 +45,8 @@ define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
|
|||
define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
|
||||
; CHECK-LABEL: add_zext_cmp_mask_same_size_result:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: clrlwi 3, 3, 31
|
||||
; CHECK-NEXT: subfic 3, 3, 27
|
||||
; CHECK-NEXT: clrldi 3, 3, 63
|
||||
; CHECK-NEXT: xori 3, 3, 27
|
||||
; CHECK-NEXT: blr
|
||||
%a = and i8 %x, 1
|
||||
%c = icmp eq i8 %a, 0
|
||||
|
@ -58,8 +58,8 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
|
|||
define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
|
||||
; CHECK-LABEL: add_zext_cmp_mask_wider_result:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: clrlwi 3, 3, 31
|
||||
; CHECK-NEXT: subfic 3, 3, 27
|
||||
; CHECK-NEXT: clrldi 3, 3, 63
|
||||
; CHECK-NEXT: xori 3, 3, 27
|
||||
; CHECK-NEXT: blr
|
||||
%a = and i8 %x, 1
|
||||
%c = icmp eq i8 %a, 0
|
||||
|
@ -71,8 +71,8 @@ define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
|
|||
define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
|
||||
; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: clrlwi 3, 3, 31
|
||||
; CHECK-NEXT: subfic 3, 3, 43
|
||||
; CHECK-NEXT: clrldi 3, 3, 63
|
||||
; CHECK-NEXT: xori 3, 3, 43
|
||||
; CHECK-NEXT: blr
|
||||
%a = and i32 %x, 1
|
||||
%c = icmp eq i32 %a, 0
|
||||
|
@ -120,8 +120,9 @@ define i16 @low_bit_select_constants_bigger_false_narrower_result(i32 %x) {
|
|||
define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
|
||||
; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li 4, -29
|
||||
; CHECK-NEXT: clrldi 3, 3, 63
|
||||
; CHECK-NEXT: subfic 3, 3, -29
|
||||
; CHECK-NEXT: xor 3, 3, 4
|
||||
; CHECK-NEXT: blr
|
||||
%a = and i8 %x, 1
|
||||
%c = icmp eq i8 %a, 0
|
||||
|
@ -133,7 +134,7 @@ define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) {
|
|||
; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: clrldi 3, 3, 63
|
||||
; CHECK-NEXT: subfic 3, 3, 227
|
||||
; CHECK-NEXT: xori 3, 3, 227
|
||||
; CHECK-NEXT: blr
|
||||
%a = and i8 %x, 1
|
||||
%c = icmp eq i8 %a, 0
|
||||
|
@ -145,7 +146,7 @@ define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) {
|
|||
; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: clrldi 3, 3, 63
|
||||
; CHECK-NEXT: subfic 3, 3, 41
|
||||
; CHECK-NEXT: xori 3, 3, 41
|
||||
; CHECK-NEXT: blr
|
||||
%a = and i16 %x, 1
|
||||
%c = icmp eq i16 %a, 0
|
||||
|
|
|
@ -495,7 +495,7 @@ define i8 @sel_constants_urem_constant(i1 %cond) {
|
|||
; ALL-LABEL: sel_constants_urem_constant:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: clrldi 3, 3, 63
|
||||
; ALL-NEXT: subfic 3, 3, 3
|
||||
; ALL-NEXT: xori 3, 3, 3
|
||||
; ALL-NEXT: blr
|
||||
%sel = select i1 %cond, i8 -4, i8 23
|
||||
%bo = urem i8 %sel, 5
|
||||
|
@ -530,7 +530,7 @@ define i8 @sel_constants_and_constant(i1 %cond) {
|
|||
; ALL-LABEL: sel_constants_and_constant:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: clrldi 3, 3, 63
|
||||
; ALL-NEXT: subfic 3, 3, 5
|
||||
; ALL-NEXT: xori 3, 3, 5
|
||||
; ALL-NEXT: blr
|
||||
%sel = select i1 %cond, i8 -4, i8 23
|
||||
%bo = and i8 %sel, 5
|
||||
|
|
|
@ -2051,8 +2051,7 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -2123,8 +2122,7 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -2199,8 +2197,7 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -2271,8 +2268,7 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -2347,8 +2343,7 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -2419,8 +2414,7 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -2495,8 +2489,7 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -2567,8 +2560,7 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -2643,8 +2635,7 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w.aqrl a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -2715,8 +2706,7 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w.aqrl a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -2791,8 +2781,7 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -2863,8 +2852,7 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -2939,8 +2927,7 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -3011,8 +2998,7 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -3087,8 +3073,7 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -3159,8 +3144,7 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -3235,8 +3219,7 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -3307,8 +3290,7 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w.aq a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -3383,8 +3365,7 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w.aqrl a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -3455,8 +3436,7 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w.aqrl a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
|
|
@ -628,8 +628,7 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -703,8 +702,7 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
@ -782,8 +780,7 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV32IA-NEXT: slli a1, a1, 24
|
||||
; RV32IA-NEXT: srai a1, a1, 24
|
||||
; RV32IA-NEXT: sll a1, a1, a0
|
||||
; RV32IA-NEXT: li a5, 24
|
||||
; RV32IA-NEXT: sub a3, a5, a3
|
||||
; RV32IA-NEXT: xori a3, a3, 24
|
||||
; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV32IA-NEXT: lr.w a5, (a2)
|
||||
; RV32IA-NEXT: and a7, a5, a4
|
||||
|
@ -857,8 +854,7 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
|
|||
; RV64IA-NEXT: slli a1, a1, 56
|
||||
; RV64IA-NEXT: srai a1, a1, 56
|
||||
; RV64IA-NEXT: sllw a1, a1, a0
|
||||
; RV64IA-NEXT: li a5, 56
|
||||
; RV64IA-NEXT: sub a3, a5, a3
|
||||
; RV64IA-NEXT: xori a3, a3, 56
|
||||
; RV64IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
|
||||
; RV64IA-NEXT: lr.w a5, (a2)
|
||||
; RV64IA-NEXT: and a7, a5, a4
|
||||
|
|
|
@ -238,8 +238,8 @@ entry:
|
|||
declare void @g(i8*)
|
||||
|
||||
; CHECK: expand_setcc
|
||||
; CHECK: cmp %i0, 1
|
||||
; CHECK: movl %xcc, 1,
|
||||
; CHECK: cmp %i0, 0
|
||||
; CHECK: movg %xcc, 1,
|
||||
define i32 @expand_setcc(i64 %a) {
|
||||
%cond = icmp sle i64 %a, 0
|
||||
%cast2 = zext i1 %cond to i32
|
||||
|
|
Loading…
Reference in New Issue