From 8b756713140f0661882ddd451cef7e245cf112aa Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 8 Jul 2022 08:13:14 -0400
Subject: [PATCH] [SDAG] try to replace subtract-from-constant with xor

This is almost the same as the abandoned D48529, but it
allows splat vector constants too.

This replaces the x86-specific code that was added with
the alternate patch D48557 with the original generic
combine.

This transform is a less restricted form of an existing
InstCombine and the proposed SDAG equivalent for that
in D128080:
https://alive2.llvm.org/ce/z/OUm6N_

Differential Revision: https://reviews.llvm.org/D128123
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  8 +++
 llvm/lib/Target/X86/X86InstrCompiler.td       | 38 ------------
 llvm/test/CodeGen/AArch64/sub1.ll             |  8 +--
 llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll     | 36 ++++++-----
 .../test/CodeGen/AMDGPU/setcc-multiple-use.ll |  6 +-
 llvm/test/CodeGen/ARM/intrinsics-overflow.ll  |  6 +-
 llvm/test/CodeGen/ARM/usub_sat.ll             |  6 +-
 llvm/test/CodeGen/ARM/usub_sat_plus.ll        |  6 +-
 llvm/test/CodeGen/PowerPC/bool-math.ll        | 19 +++---
 llvm/test/CodeGen/PowerPC/select_const.ll     |  4 +-
 llvm/test/CodeGen/RISCV/atomic-rmw.ll         | 60 +++++++------------
 llvm/test/CodeGen/RISCV/atomic-signext.ll     | 12 ++--
 llvm/test/CodeGen/SPARC/64bit.ll              |  4 +-
 13 files changed, 81 insertions(+), 132 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 07b237f7436f..66c8741c2774 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3762,6 +3762,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     }
   }
 
+  // If there's no chance any bit will need to borrow from an adjacent bit:
+  // sub C, X --> xor X, C
+  if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
+    if (!C0->isOpaque() &&
+        (~DAG.computeKnownBits(N1).Zero).isSubsetOf(C0->getAPIntValue()))
+      return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index a55b95960aa6..6124755ca539 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1531,44 +1531,6 @@ def : Pat<(xor GR32:$src1, -2147483648),
           (ADD32ri GR32:$src1, -2147483648)>;
 }
 
-//===----------------------------------------------------------------------===//
-// Pattern match SUB as XOR
-//===----------------------------------------------------------------------===//
-
-// An immediate in the LHS of a subtract can't be encoded in the instruction.
-// If there is no possibility of a borrow we can use an XOR instead of a SUB
-// to enable the immediate to be folded.
-// TODO: Move this to a DAG combine?
-
-def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
-    KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1));
-
-    // If all possible ones in the RHS are set in the LHS then there can't be
-    // a borrow and we can use xor.
-    return (~Known.Zero).isSubsetOf(CN->getAPIntValue());
-  }
-
-  return false;
-}]>;
-
-let AddedComplexity = 5 in {
-def : Pat<(sub_is_xor imm:$src2, GR8:$src1),
-          (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i16immSExt8:$src2, GR16:$src1),
-          (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub_is_xor imm:$src2, GR16:$src1),
-          (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i32immSExt8:$src2, GR32:$src1),
-          (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(sub_is_xor imm:$src2, GR32:$src1),
-          (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub_is_xor i64immSExt8:$src2, GR64:$src1),
-          (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(sub_is_xor i64immSExt32:$src2, GR64:$src1),
-          (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-}
-
 //===----------------------------------------------------------------------===//
 // Some peepholes
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sub1.ll b/llvm/test/CodeGen/AArch64/sub1.ll
index 688742bc907d..97dafcd69922 100644
--- a/llvm/test/CodeGen/AArch64/sub1.ll
+++ b/llvm/test/CodeGen/AArch64/sub1.ll
@@ -18,9 +18,8 @@ define i8 @masked_sub_i8(i8 %x) {
 ; CHECK-LABEL: masked_sub_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #5
-; CHECK-NEXT:    mov w9, #7
 ; CHECK-NEXT:    and w8, w0, w8
-; CHECK-NEXT:    sub w0, w9, w8
+; CHECK-NEXT:    eor w0, w8, #0x7
 ; CHECK-NEXT:    ret
   %a = and i8 %x, 5
   %m = sub i8 7, %a
@@ -43,9 +42,8 @@ define i32 @masked_sub_i32(i32 %x) {
 ; CHECK-LABEL: masked_sub_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9
-; CHECK-NEXT:    mov w9, #31
 ; CHECK-NEXT:    and w8, w0, w8
-; CHECK-NEXT:    sub w0, w9, w8
+; CHECK-NEXT:    eor w0, w8, #0x1f
 ; CHECK-NEXT:    ret
   %a = and i32 %x, 9
   %m = sub i32 31, %a
@@ -58,7 +56,7 @@ define <4 x i32> @masked_sub_v4i32(<4 x i32> %x) {
 ; CHECK-NEXT:    movi v1.4s, #42
 ; CHECK-NEXT:    movi v2.4s, #1, msl #8
 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %a = and <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
   %m = sub <4 x i32> <i32 511, i32 511, i32 511, i32 511>, %a
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 33d0710f9c50..5ff781bc8be0 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -248,38 +248,40 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
   ret void
 }
 
+; this could have the offset transform, but sub became xor
+
 define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
 ; CI-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
+; CI-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
 ; CI-NEXT:    v_mov_b32_e32 v1, 13
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_write_b8 v0, v1 offset:65535
+; CI-NEXT:    ds_write_b8 v0, v1
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX9-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 13
-; GFX9-NEXT:    ds_write_b8 v0, v1 offset:65535
+; GFX9-NEXT:    ds_write_b8 v0, v1
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 13
-; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
-; GFX10-NEXT:    ds_write_b8 v0, v1 offset:65535
+; GFX10-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    ds_write_b8 v0, v1
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
-; GFX11-NEXT:    ds_store_b8 v0, v1 offset:65535
+; GFX11-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT:    ds_store_b8 v0, v1
 ; GFX11-NEXT:    s_endpgm
   %x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
   %.neg = mul i32 %x.i, -4
@@ -290,38 +292,40 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
   ret void
 }
 
+; this could have the offset transform, but sub became xor
+
 define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
 ; CI-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
+; CI-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
 ; CI-NEXT:    v_mov_b32_e32 v1, 13
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    ds_write_b8 v0, v1 offset:65535
+; CI-NEXT:    ds_write_b8 v0, v1
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_sub_u32_e32 v0, 0, v0
+; GFX9-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 13
-; GFX9-NEXT:    ds_write_b8 v0, v1 offset:65535
+; GFX9-NEXT:    ds_write_b8 v0, v1
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 13
-; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
-; GFX10-NEXT:    ds_write_b8 v0, v1 offset:65535
+; GFX10-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    ds_write_b8 v0, v1
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 0, v0
-; GFX11-NEXT:    ds_store_b8 v0, v1 offset:65535
+; GFX11-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT:    ds_store_b8 v0, v1
 ; GFX11-NEXT:    s_endpgm
   %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
index bff4c0c1533a..f1d36ebec3ad 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll
@@ -15,10 +15,10 @@ define i32 @f() {
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    ds_read_b32 v0, v0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; CHECK-NEXT:    s_cmpk_lg_u32 vcc_lo, 0x0
-; CHECK-NEXT:    s_subb_u32 s4, 1, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, s4, vcc_lo
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc_lo
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = load i32, i32 addrspace(3)* null, align 16
diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
index c3f64072d7d5..8bd78dd0f6ab 100644
--- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
+++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll
@@ -58,18 +58,18 @@ define i32 @usub_overflow(i32 %a, i32 %b) #0 {
   ; ARM: subs    r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
   ; ARM: mov     r[[R2:[0-9]+]], #0
   ; ARM: adc     r[[R0]], r[[R2]], #0
-  ; ARM: rsb     r[[R0]], r[[R0]], #1
+  ; ARM: eor     r[[R0]], r[[R0]], #1
 
   ; THUMBV6: movs    r[[R2:[0-9]+]], #0
   ; THUMBV6: subs    r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
   ; THUMBV6: adcs    r[[R2]], r[[R2]]
   ; THUMBV6: movs    r[[R0]], #1
-  ; THUMBV6: subs    r[[R0]], r[[R0]], r[[R2]]
+  ; THUMBV6: eors    r[[R0]], r[[R2]]
 
   ; THUMBV7: subs    r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
   ; THUMBV7: mov.w   r[[R2:[0-9]+]], #0
   ; THUMBV7: adc     r[[R0]], r[[R2]], #0
-  ; THUMBV7: rsb.w   r[[R0]], r[[R0]], #1
+  ; THUMBV7: eor     r[[R0]], r[[R0]], #1
 
   ; We should know that the overflow is just 1 bit,
   ; no need to clear any other bit
diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll
index c16869f4b4dd..9c2fd3966ea9 100644
--- a/llvm/test/CodeGen/ARM/usub_sat.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat.ll
@@ -48,7 +48,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:    adcs r0, r1
 ; CHECK-T1-NEXT:    movs r3, #1
-; CHECK-T1-NEXT:    subs r3, r3, r0
+; CHECK-T1-NEXT:    eors r3, r0
 ; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:    beq .LBB1_3
 ; CHECK-T1-NEXT:  @ %bb.1:
@@ -70,7 +70,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T2-NEXT:    mov.w r12, #0
 ; CHECK-T2-NEXT:    sbcs r1, r3
 ; CHECK-T2-NEXT:    adc r2, r12, #0
-; CHECK-T2-NEXT:    rsbs.w r2, r2, #1
+; CHECK-T2-NEXT:    eors r2, r2, #1
 ; CHECK-T2-NEXT:    itt ne
 ; CHECK-T2-NEXT:    movne r0, #0
 ; CHECK-T2-NEXT:    movne r1, #0
@@ -82,7 +82,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-ARM-NEXT:    mov r12, #0
 ; CHECK-ARM-NEXT:    sbcs r1, r1, r3
 ; CHECK-ARM-NEXT:    adc r2, r12, #0
-; CHECK-ARM-NEXT:    rsbs r2, r2, #1
+; CHECK-ARM-NEXT:    eors r2, r2, #1
 ; CHECK-ARM-NEXT:    movwne r0, #0
 ; CHECK-ARM-NEXT:    movwne r1, #0
 ; CHECK-ARM-NEXT:    bx lr
diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
index 04494a2e4059..51ec83c70760 100644
--- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
@@ -54,7 +54,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:    adcs r0, r1
 ; CHECK-T1-NEXT:    movs r4, #1
-; CHECK-T1-NEXT:    subs r4, r4, r0
+; CHECK-T1-NEXT:    eors r4, r0
 ; CHECK-T1-NEXT:    mov r0, r1
 ; CHECK-T1-NEXT:    beq .LBB1_3
 ; CHECK-T1-NEXT:  @ %bb.1:
@@ -77,7 +77,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-T2-NEXT:    subs r0, r0, r2
 ; CHECK-T2-NEXT:    sbcs r1, r3
 ; CHECK-T2-NEXT:    adc r2, r12, #0
-; CHECK-T2-NEXT:    rsbs.w r2, r2, #1
+; CHECK-T2-NEXT:    eors r2, r2, #1
 ; CHECK-T2-NEXT:    itt ne
 ; CHECK-T2-NEXT:    movne r0, #0
 ; CHECK-T2-NEXT:    movne r1, #0
@@ -91,7 +91,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; CHECK-ARM-NEXT:    subs r0, r0, r2
 ; CHECK-ARM-NEXT:    sbcs r1, r1, r3
 ; CHECK-ARM-NEXT:    adc r2, r12, #0
-; CHECK-ARM-NEXT:    rsbs r2, r2, #1
+; CHECK-ARM-NEXT:    eors r2, r2, #1
 ; CHECK-ARM-NEXT:    movwne r0, #0
 ; CHECK-ARM-NEXT:    movwne r1, #0
 ; CHECK-ARM-NEXT:    bx lr
diff --git a/llvm/test/CodeGen/PowerPC/bool-math.ll b/llvm/test/CodeGen/PowerPC/bool-math.ll
index 9e443fb0e507..2016e5ad4ffe 100644
--- a/llvm/test/CodeGen/PowerPC/bool-math.ll
+++ b/llvm/test/CodeGen/PowerPC/bool-math.ll
@@ -45,8 +45,8 @@ define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
 define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi 3, 3, 31
-; CHECK-NEXT:    subfic 3, 3, 27
+; CHECK-NEXT:    clrldi 3, 3, 63
+; CHECK-NEXT:    xori 3, 3, 27
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -58,8 +58,8 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi 3, 3, 31
-; CHECK-NEXT:    subfic 3, 3, 27
+; CHECK-NEXT:    clrldi 3, 3, 63
+; CHECK-NEXT:    xori 3, 3, 27
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -71,8 +71,8 @@ define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
 define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    clrlwi 3, 3, 31
-; CHECK-NEXT:    subfic 3, 3, 43
+; CHECK-NEXT:    clrldi 3, 3, 63
+; CHECK-NEXT:    xori 3, 3, 43
 ; CHECK-NEXT:    blr
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -120,8 +120,9 @@ define i16 @low_bit_select_constants_bigger_false_narrower_result(i32 %x) {
 define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    li 4, -29
 ; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    subfic 3, 3, -29
+; CHECK-NEXT:    xor 3, 3, 4
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -133,7 +134,7 @@ define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    subfic 3, 3, 227
+; CHECK-NEXT:    xori 3, 3, 227
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -145,7 +146,7 @@ define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    subfic 3, 3, 41
+; CHECK-NEXT:    xori 3, 3, 41
 ; CHECK-NEXT:    blr
   %a = and i16 %x, 1
   %c = icmp eq i16 %a, 0
diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll
index b63cad382393..225188fe4eec 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -495,7 +495,7 @@ define i8 @sel_constants_urem_constant(i1 %cond) {
 ; ALL-LABEL: sel_constants_urem_constant:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    clrldi 3, 3, 63
-; ALL-NEXT:    subfic 3, 3, 3
+; ALL-NEXT:    xori 3, 3, 3
 ; ALL-NEXT:    blr
   %sel = select i1 %cond, i8 -4, i8 23
   %bo = urem i8 %sel, 5
@@ -530,7 +530,7 @@ define i8 @sel_constants_and_constant(i1 %cond) {
 ; ALL-LABEL: sel_constants_and_constant:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    clrldi 3, 3, 63
-; ALL-NEXT:    subfic 3, 3, 5
+; ALL-NEXT:    xori 3, 3, 5
 ; ALL-NEXT:    blr
   %sel = select i1 %cond, i8 -4, i8 23
   %bo = and i8 %sel, 5
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 62fcee85d5f9..42f8d41e527f 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -2051,8 +2051,7 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -2123,8 +2122,7 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -2199,8 +2197,7 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w.aq a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -2271,8 +2268,7 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w.aq a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -2347,8 +2343,7 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -2419,8 +2414,7 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -2495,8 +2489,7 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w.aq a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -2567,8 +2560,7 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w.aq a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -2643,8 +2635,7 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -2715,8 +2706,7 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w.aqrl a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -2791,8 +2781,7 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -2863,8 +2852,7 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -2939,8 +2927,7 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w.aq a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -3011,8 +2998,7 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w.aq a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -3087,8 +3073,7 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -3159,8 +3144,7 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -3235,8 +3219,7 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w.aq a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -3307,8 +3290,7 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w.aq a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -3383,8 +3365,7 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -3455,8 +3436,7 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w.aqrl a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 7b4d37985f98..c9015ac7673e 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -628,8 +628,7 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -703,8 +702,7 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
@@ -782,8 +780,7 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    li a5, 24
-; RV32IA-NEXT:    sub a3, a5, a3
+; RV32IA-NEXT:    xori a3, a3, 24
 ; RV32IA-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
 ; RV32IA-NEXT:    lr.w a5, (a2)
 ; RV32IA-NEXT:    and a7, a5, a4
@@ -857,8 +854,7 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
-; RV64IA-NEXT:    li a5, 56
-; RV64IA-NEXT:    sub a3, a5, a3
+; RV64IA-NEXT:    xori a3, a3, 56
 ; RV64IA-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
 ; RV64IA-NEXT:    lr.w a5, (a2)
 ; RV64IA-NEXT:    and a7, a5, a4
diff --git a/llvm/test/CodeGen/SPARC/64bit.ll b/llvm/test/CodeGen/SPARC/64bit.ll
index 274fa32fad35..c61476eb2265 100644
--- a/llvm/test/CodeGen/SPARC/64bit.ll
+++ b/llvm/test/CodeGen/SPARC/64bit.ll
@@ -238,8 +238,8 @@ entry:
 declare void @g(i8*)
 
 ; CHECK: expand_setcc
-; CHECK: cmp %i0, 1
-; CHECK: movl %xcc, 1,
+; CHECK: cmp %i0, 0
+; CHECK: movg %xcc, 1,
 define i32 @expand_setcc(i64 %a) {
   %cond = icmp sle i64 %a, 0
   %cast2 = zext i1 %cond to i32