llvm-project/llvm/test/CodeGen/Mips/atomic.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

7564 lines
254 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS32
; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS32O0
; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS32R2
; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS32R6
; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS32R6O0
; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS4
; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS64
; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS64R2
; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS64R6
; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS64R6O0
; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MM32
; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness.
; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1
; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2
; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3
; Keep one big-endian check so that we don't reduce testing, but don't add more
; since endianness doesn't affect the body of the atomic operations.
; RUN: llc -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \
; RUN: FileCheck %s -check-prefix=MIPS32EB
@x = common global i32 0, align 4
define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadAdd32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB0_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: addu $3, $2, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB0_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: AtomicLoadAdd32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB0_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: addu $3, $2, $4
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB0_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadAdd32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB0_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: addu $3, $2, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB0_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicLoadAdd32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB0_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: addu $3, $2, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB0_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicLoadAdd32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB0_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: addu $3, $2, $4
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB0_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadAdd32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB0_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: addu $3, $2, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB0_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: AtomicLoadAdd32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB0_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: addu $3, $2, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB0_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: AtomicLoadAdd32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB0_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: addu $3, $2, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB0_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicLoadAdd32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB0_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: addu $3, $2, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB0_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicLoadAdd32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB0_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: addu $3, $2, $4
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadAdd32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB0_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: addu16 $3, $2, $4
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB0_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicLoadAdd32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB0_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: addu $3, $2, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB0_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: AtomicLoadAdd32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB0_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: addu $3, $2, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB0_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: AtomicLoadAdd32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB0_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: addu $3, $2, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB0_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: AtomicLoadAdd32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB0_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: addu $3, $2, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB0_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw add i32* @x, i32 %incr monotonic
ret i32 %0
}
define i32 @AtomicLoadSub32(i32 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadSub32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB1_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: subu $3, $2, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB1_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: AtomicLoadSub32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB1_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: subu $3, $2, $4
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB1_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadSub32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB1_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: subu $3, $2, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB1_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicLoadSub32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB1_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: subu $3, $2, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB1_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicLoadSub32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB1_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: subu $3, $2, $4
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB1_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadSub32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB1_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: subu $3, $2, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB1_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: AtomicLoadSub32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB1_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: subu $3, $2, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB1_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: AtomicLoadSub32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB1_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: subu $3, $2, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB1_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicLoadSub32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB1_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: subu $3, $2, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB1_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicLoadSub32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB1_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: subu $3, $2, $4
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadSub32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB1_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: subu16 $3, $2, $4
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB1_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicLoadSub32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB1_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: subu $3, $2, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB1_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: AtomicLoadSub32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB1_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: subu $3, $2, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB1_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: AtomicLoadSub32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB1_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: subu $3, $2, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB1_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: AtomicLoadSub32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB1_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: subu $3, $2, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB1_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw sub i32* @x, i32 %incr monotonic
ret i32 %0
}
define i32 @AtomicLoadXor32(i32 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadXor32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB2_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: xor $3, $2, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB2_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: AtomicLoadXor32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB2_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: xor $3, $2, $4
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB2_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadXor32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB2_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: xor $3, $2, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB2_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicLoadXor32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB2_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: xor $3, $2, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB2_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicLoadXor32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB2_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: xor $3, $2, $4
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB2_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadXor32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB2_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: xor $3, $2, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB2_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: AtomicLoadXor32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB2_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: xor $3, $2, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB2_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: AtomicLoadXor32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB2_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: xor $3, $2, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB2_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicLoadXor32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB2_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: xor $3, $2, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB2_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicLoadXor32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB2_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: xor $3, $2, $4
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadXor32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB2_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: xor $3, $2, $4
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB2_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicLoadXor32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB2_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: xor $3, $2, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB2_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: AtomicLoadXor32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB2_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: xor $3, $2, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB2_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: AtomicLoadXor32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB2_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: xor $3, $2, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB2_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: AtomicLoadXor32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB2_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: xor $3, $2, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB2_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw xor i32* @x, i32 %incr monotonic
ret i32 %0
}
define i32 @AtomicLoadOr32(i32 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadOr32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB3_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: or $3, $2, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB3_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: AtomicLoadOr32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB3_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: or $3, $2, $4
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB3_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadOr32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB3_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: or $3, $2, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB3_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicLoadOr32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB3_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: or $3, $2, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB3_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicLoadOr32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB3_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: or $3, $2, $4
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB3_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadOr32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB3_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: or $3, $2, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB3_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: AtomicLoadOr32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB3_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: or $3, $2, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB3_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: AtomicLoadOr32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB3_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: or $3, $2, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB3_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicLoadOr32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB3_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: or $3, $2, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB3_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicLoadOr32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB3_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: or $3, $2, $4
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadOr32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB3_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: or $3, $2, $4
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB3_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicLoadOr32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB3_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: or $3, $2, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB3_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: AtomicLoadOr32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB3_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: or $3, $2, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB3_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: AtomicLoadOr32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB3_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: or $3, $2, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB3_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: AtomicLoadOr32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB3_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: or $3, $2, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB3_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw or i32* @x, i32 %incr monotonic
ret i32 %0
}
define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadAnd32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB4_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: and $3, $2, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB4_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: AtomicLoadAnd32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB4_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: and $3, $2, $4
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB4_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadAnd32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB4_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: and $3, $2, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB4_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicLoadAnd32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB4_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: and $3, $2, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB4_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicLoadAnd32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB4_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: and $3, $2, $4
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB4_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadAnd32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB4_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: and $3, $2, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB4_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: AtomicLoadAnd32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB4_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: and $3, $2, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB4_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: AtomicLoadAnd32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB4_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: and $3, $2, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB4_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicLoadAnd32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB4_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: and $3, $2, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB4_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicLoadAnd32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB4_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: and $3, $2, $4
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadAnd32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB4_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: and $3, $2, $4
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB4_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicLoadAnd32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB4_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: and $3, $2, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB4_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: AtomicLoadAnd32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB4_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: and $3, $2, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB4_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: AtomicLoadAnd32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB4_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: and $3, $2, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB4_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: AtomicLoadAnd32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB4_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: and $3, $2, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB4_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw and i32* @x, i32 %incr monotonic
ret i32 %0
}
define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadNand32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB5_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: and $3, $2, $4
; MIPS32-NEXT: nor $3, $zero, $3
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB5_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: AtomicLoadNand32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB5_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: and $3, $2, $4
; MIPS32O0-NEXT: nor $3, $zero, $3
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB5_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadNand32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB5_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: and $3, $2, $4
; MIPS32R2-NEXT: nor $3, $zero, $3
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB5_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicLoadNand32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB5_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: and $3, $2, $4
; MIPS32R6-NEXT: nor $3, $zero, $3
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB5_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicLoadNand32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB5_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: and $3, $2, $4
; MIPS32R6O0-NEXT: nor $3, $zero, $3
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB5_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadNand32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB5_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: and $3, $2, $4
; MIPS4-NEXT: nor $3, $zero, $3
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB5_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: AtomicLoadNand32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB5_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: and $3, $2, $4
; MIPS64-NEXT: nor $3, $zero, $3
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB5_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: AtomicLoadNand32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB5_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: and $3, $2, $4
; MIPS64R2-NEXT: nor $3, $zero, $3
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB5_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicLoadNand32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB5_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: and $3, $2, $4
; MIPS64R6-NEXT: nor $3, $zero, $3
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB5_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicLoadNand32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB5_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: and $3, $2, $4
; MIPS64R6O0-NEXT: nor $3, $zero, $3
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadNand32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB5_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: and $3, $2, $4
; MM32-NEXT: nor $3, $zero, $3
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB5_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicLoadNand32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB5_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: and $3, $2, $4
; O1-NEXT: nor $3, $zero, $3
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB5_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: AtomicLoadNand32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB5_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: and $3, $2, $4
; O2-NEXT: nor $3, $zero, $3
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB5_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: AtomicLoadNand32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB5_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: and $3, $2, $4
; O3-NEXT: nor $3, $zero, $3
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB5_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: AtomicLoadNand32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB5_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: and $3, $2, $4
; MIPS32EB-NEXT: nor $3, $zero, $3
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB5_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw nand i32* @x, i32 %incr monotonic
ret i32 %0
}
define i32 @AtomicSwap32(i32 signext %newval) nounwind {
; MIPS32-LABEL: AtomicSwap32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addiu $sp, $sp, -8
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: sw $4, 4($sp)
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB6_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: move $3, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB6_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: addiu $sp, $sp, 8
;
; MIPS32O0-LABEL: AtomicSwap32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sw $4, 4($sp)
; MIPS32O0-NEXT: lw $2, 4($sp)
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: $BB6_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $3, 0($1)
; MIPS32O0-NEXT: move $4, $2
; MIPS32O0-NEXT: sc $4, 0($1)
; MIPS32O0-NEXT: beqz $4, $BB6_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: move $2, $3
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicSwap32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addiu $sp, $sp, -8
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: sw $4, 4($sp)
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB6_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: move $3, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB6_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: addiu $sp, $sp, 8
;
; MIPS32R6-LABEL: AtomicSwap32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addiu $sp, $sp, -8
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: sw $4, 4($sp)
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB6_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: move $3, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB6_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: addiu $sp, $sp, 8
;
; MIPS32R6O0-LABEL: AtomicSwap32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sw $4, 4($sp)
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: lw $2, 4($sp)
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB6_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $3, 0($1)
; MIPS32R6O0-NEXT: move $4, $2
; MIPS32R6O0-NEXT: sc $4, 0($1)
; MIPS32R6O0-NEXT: beqzc $4, $BB6_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: move $2, $3
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicSwap32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: daddiu $sp, $sp, -16
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32)))
; MIPS4-NEXT: sw $4, 12($sp)
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB6_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: move $3, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB6_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: daddiu $sp, $sp, 16
;
; MIPS64-LABEL: AtomicSwap32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32)))
; MIPS64-NEXT: sw $4, 12($sp)
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB6_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: move $3, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB6_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: daddiu $sp, $sp, 16
;
; MIPS64R2-LABEL: AtomicSwap32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: daddiu $sp, $sp, -16
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32)))
; MIPS64R2-NEXT: sw $4, 12($sp)
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB6_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: move $3, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB6_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: daddiu $sp, $sp, 16
;
; MIPS64R6-LABEL: AtomicSwap32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32)))
; MIPS64R6-NEXT: sw $4, 12($sp)
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB6_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: move $3, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB6_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
;
; MIPS64R6O0-LABEL: AtomicSwap32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: sw $4, 12($sp)
; MIPS64R6O0-NEXT: lw $2, 12($sp)
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB6_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $3, 0($1)
; MIPS64R6O0-NEXT: move $4, $2
; MIPS64R6O0-NEXT: sc $4, 0($1)
; MIPS64R6O0-NEXT: beqzc $4, .LBB6_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: move $2, $3
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicSwap32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addiu $sp, $sp, -8
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: sw $4, 4($sp)
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB6_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: or $3, $4, $zero
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB6_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: addiusp 8
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicSwap32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addiu $sp, $sp, -8
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: sw $4, 4($sp)
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB6_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: move $3, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB6_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: addiu $sp, $sp, 8
;
; O2-LABEL: AtomicSwap32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addiu $sp, $sp, -8
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: sw $4, 4($sp)
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB6_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: move $3, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB6_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: addiu $sp, $sp, 8
;
; O3-LABEL: AtomicSwap32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addiu $sp, $sp, -8
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: sw $4, 4($sp)
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB6_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: move $3, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB6_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: addiu $sp, $sp, 8
;
; MIPS32EB-LABEL: AtomicSwap32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addiu $sp, $sp, -8
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: sw $4, 4($sp)
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB6_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: move $3, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB6_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: addiu $sp, $sp, 8
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
%tmp = load i32, i32* %newval.addr, align 4
%0 = atomicrmw xchg i32* @x, i32 %tmp monotonic
ret i32 %0
}
define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
; MIPS32-LABEL: AtomicCmpSwap32:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addiu $sp, $sp, -8
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: sw $5, 4($sp)
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: $BB7_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: bne $2, $4, $BB7_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS32-NEXT: move $3, $5
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB7_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB7_3: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: addiu $sp, $sp, 8
;
; MIPS32O0-LABEL: AtomicCmpSwap32:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sw $5, 4($sp)
; MIPS32O0-NEXT: lw $2, 4($sp)
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: move $3, $4
; MIPS32O0-NEXT: $BB7_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $5, 0($1)
; MIPS32O0-NEXT: bne $5, $3, $BB7_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS32O0-NEXT: move $6, $2
; MIPS32O0-NEXT: sc $6, 0($1)
; MIPS32O0-NEXT: beqz $6, $BB7_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB7_3: # %entry
; MIPS32O0-NEXT: xor $1, $5, $4
; MIPS32O0-NEXT: sltiu $1, $1, 1
; MIPS32O0-NEXT: move $2, $5
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicCmpSwap32:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addiu $sp, $sp, -8
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: sw $5, 4($sp)
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: $BB7_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: bne $2, $4, $BB7_3
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS32R2-NEXT: move $3, $5
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB7_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: $BB7_3: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: addiu $sp, $sp, 8
;
; MIPS32R6-LABEL: AtomicCmpSwap32:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addiu $sp, $sp, -8
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: sw $5, 4($sp)
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: $BB7_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: bnec $2, $4, $BB7_3
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS32R6-NEXT: move $3, $5
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB7_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: $BB7_3: # %entry
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: addiu $sp, $sp, 8
;
; MIPS32R6O0-LABEL: AtomicCmpSwap32:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sw $5, 4($sp)
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: lw $2, 4($sp)
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB7_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $3, 0($1)
; MIPS32R6O0-NEXT: bnec $3, $4, $BB7_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: move $5, $2
; MIPS32R6O0-NEXT: sc $5, 0($1)
; MIPS32R6O0-NEXT: beqzc $5, $BB7_1
; MIPS32R6O0-NEXT: $BB7_3: # %entry
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: move $2, $3
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicCmpSwap32:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: daddiu $sp, $sp, -16
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS4-NEXT: sw $5, 12($sp)
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: .LBB7_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: bne $2, $4, .LBB7_3
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS4-NEXT: move $3, $5
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB7_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: .LBB7_3: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: daddiu $sp, $sp, 16
;
; MIPS64-LABEL: AtomicCmpSwap32:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $sp, $sp, -16
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64-NEXT: sw $5, 12($sp)
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: .LBB7_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: bne $2, $4, .LBB7_3
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS64-NEXT: move $3, $5
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB7_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: .LBB7_3: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: daddiu $sp, $sp, 16
;
; MIPS64R2-LABEL: AtomicCmpSwap32:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: daddiu $sp, $sp, -16
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64R2-NEXT: sw $5, 12($sp)
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: .LBB7_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: bne $2, $4, .LBB7_3
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS64R2-NEXT: move $3, $5
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB7_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: .LBB7_3: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: daddiu $sp, $sp, 16
;
; MIPS64R6-LABEL: AtomicCmpSwap32:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $sp, $sp, -16
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64R6-NEXT: sw $5, 12($sp)
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: .LBB7_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: bnec $2, $4, .LBB7_3
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS64R6-NEXT: move $3, $5
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB7_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: .LBB7_3: # %entry
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: daddiu $sp, $sp, 16
;
; MIPS64R6O0-LABEL: AtomicCmpSwap32:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6O0-NEXT: sw $5, 12($sp)
; MIPS64R6O0-NEXT: lw $2, 12($sp)
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB7_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $3, 0($1)
; MIPS64R6O0-NEXT: bnec $3, $4, .LBB7_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS64R6O0-NEXT: move $5, $2
; MIPS64R6O0-NEXT: sc $5, 0($1)
; MIPS64R6O0-NEXT: beqzc $5, .LBB7_1
; MIPS64R6O0-NEXT: .LBB7_3: # %entry
; MIPS64R6O0-NEXT: move $2, $3
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicCmpSwap32:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addiu $sp, $sp, -8
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: sw $5, 4($sp)
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: $BB7_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: bne $2, $4, $BB7_3
; MM32-NEXT: nop
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: # in Loop: Header=BB7_1 Depth=1
; MM32-NEXT: move $3, $5
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB7_1
; MM32-NEXT: $BB7_3: # %entry
; MM32-NEXT: addiusp 8
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicCmpSwap32:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addiu $sp, $sp, -8
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: sw $5, 4($sp)
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: $BB7_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: bne $2, $4, $BB7_3
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: # in Loop: Header=BB7_1 Depth=1
; O1-NEXT: move $3, $5
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB7_1
; O1-NEXT: nop
; O1-NEXT: $BB7_3: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: addiu $sp, $sp, 8
;
; O2-LABEL: AtomicCmpSwap32:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addiu $sp, $sp, -8
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: sw $5, 4($sp)
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: $BB7_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: bne $2, $4, $BB7_3
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: # in Loop: Header=BB7_1 Depth=1
; O2-NEXT: move $3, $5
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB7_1
; O2-NEXT: nop
; O2-NEXT: $BB7_3: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: addiu $sp, $sp, 8
;
; O3-LABEL: AtomicCmpSwap32:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addiu $sp, $sp, -8
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: sw $5, 4($sp)
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: $BB7_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: bne $2, $4, $BB7_3
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: # in Loop: Header=BB7_1 Depth=1
; O3-NEXT: move $3, $5
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB7_1
; O3-NEXT: nop
; O3-NEXT: $BB7_3: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: addiu $sp, $sp, 8
;
; MIPS32EB-LABEL: AtomicCmpSwap32:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addiu $sp, $sp, -8
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: sw $5, 4($sp)
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: $BB7_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: bne $2, $4, $BB7_3
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: # in Loop: Header=BB7_1 Depth=1
; MIPS32EB-NEXT: move $3, $5
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB7_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: $BB7_3: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: addiu $sp, $sp, 8
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
%tmp = load i32, i32* %newval.addr, align 4
%0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic
%1 = extractvalue { i32, i1 } %0, 0
ret i32 %1
}
@y = common global i8 0, align 1
define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadAdd8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(y)($1)
; MIPS32-NEXT: addiu $2, $zero, -4
; MIPS32-NEXT: and $2, $1, $2
; MIPS32-NEXT: andi $1, $1, 3
; MIPS32-NEXT: sll $3, $1, 3
; MIPS32-NEXT: ori $1, $zero, 255
; MIPS32-NEXT: sllv $5, $1, $3
; MIPS32-NEXT: nor $6, $zero, $5
; MIPS32-NEXT: sllv $4, $4, $3
; MIPS32-NEXT: $BB8_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $7, 0($2)
; MIPS32-NEXT: addu $8, $7, $4
; MIPS32-NEXT: and $8, $8, $5
; MIPS32-NEXT: and $9, $7, $6
; MIPS32-NEXT: or $9, $9, $8
; MIPS32-NEXT: sc $9, 0($2)
; MIPS32-NEXT: beqz $9, $BB8_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: and $1, $7, $5
; MIPS32-NEXT: srlv $1, $1, $3
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $1, $1, 24
; MIPS32-NEXT: # %bb.3: # %entry
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: sra $2, $1, 24
;
; MIPS32O0-LABEL: AtomicLoadAdd8:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
; MIPS32O0-NEXT: sll $1, $1, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
; MIPS32O0-NEXT: sllv $3, $3, $1
; MIPS32O0-NEXT: nor $5, $zero, $3
; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB8_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $7, 0($2)
; MIPS32O0-NEXT: addu $8, $7, $4
; MIPS32O0-NEXT: and $8, $8, $3
; MIPS32O0-NEXT: and $9, $7, $5
; MIPS32O0-NEXT: or $9, $9, $8
; MIPS32O0-NEXT: sc $9, 0($2)
; MIPS32O0-NEXT: beqz $9, $BB8_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: and $6, $7, $3
; MIPS32O0-NEXT: srlv $6, $6, $1
; MIPS32O0-NEXT: sll $6, $6, 24
; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 24
; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadAdd8:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(y)($1)
; MIPS32R2-NEXT: addiu $2, $zero, -4
; MIPS32R2-NEXT: and $2, $1, $2
; MIPS32R2-NEXT: andi $1, $1, 3
; MIPS32R2-NEXT: sll $3, $1, 3
; MIPS32R2-NEXT: ori $1, $zero, 255
; MIPS32R2-NEXT: sllv $5, $1, $3
; MIPS32R2-NEXT: nor $6, $zero, $5
; MIPS32R2-NEXT: sllv $4, $4, $3
; MIPS32R2-NEXT: $BB8_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $7, 0($2)
; MIPS32R2-NEXT: addu $8, $7, $4
; MIPS32R2-NEXT: and $8, $8, $5
; MIPS32R2-NEXT: and $9, $7, $6
; MIPS32R2-NEXT: or $9, $9, $8
; MIPS32R2-NEXT: sc $9, 0($2)
; MIPS32R2-NEXT: beqz $9, $BB8_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: and $1, $7, $5
; MIPS32R2-NEXT: srlv $1, $1, $3
; MIPS32R2-NEXT: seb $1, $1
; MIPS32R2-NEXT: # %bb.3: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: seb $2, $1
;
; MIPS32R6-LABEL: AtomicLoadAdd8:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(y)($1)
; MIPS32R6-NEXT: addiu $2, $zero, -4
; MIPS32R6-NEXT: and $2, $1, $2
; MIPS32R6-NEXT: andi $1, $1, 3
; MIPS32R6-NEXT: sll $3, $1, 3
; MIPS32R6-NEXT: ori $1, $zero, 255
; MIPS32R6-NEXT: sllv $5, $1, $3
; MIPS32R6-NEXT: nor $6, $zero, $5
; MIPS32R6-NEXT: sllv $4, $4, $3
; MIPS32R6-NEXT: $BB8_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $7, 0($2)
; MIPS32R6-NEXT: addu $8, $7, $4
; MIPS32R6-NEXT: and $8, $8, $5
; MIPS32R6-NEXT: and $9, $7, $6
; MIPS32R6-NEXT: or $9, $9, $8
; MIPS32R6-NEXT: sc $9, 0($2)
; MIPS32R6-NEXT: beqzc $9, $BB8_1
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: and $1, $7, $5
; MIPS32R6-NEXT: srlv $1, $1, $3
; MIPS32R6-NEXT: seb $1, $1
; MIPS32R6-NEXT: # %bb.3: # %entry
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: seb $2, $1
;
; MIPS32R6O0-LABEL: AtomicLoadAdd8:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
; MIPS32R6O0-NEXT: addiu $3, $zero, -4
; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
; MIPS32R6O0-NEXT: sll $1, $1, 3
; MIPS32R6O0-NEXT: ori $5, $zero, 255
; MIPS32R6O0-NEXT: sllv $5, $5, $1
; MIPS32R6O0-NEXT: nor $6, $zero, $5
; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB8_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: ll $8, 0($3)
; MIPS32R6O0-NEXT: addu $9, $8, $4
; MIPS32R6O0-NEXT: and $9, $9, $5
; MIPS32R6O0-NEXT: and $10, $8, $6
; MIPS32R6O0-NEXT: or $10, $10, $9
; MIPS32R6O0-NEXT: sc $10, 0($3)
; MIPS32R6O0-NEXT: beqzc $10, $BB8_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: and $7, $8, $5
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadAdd8:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS4-NEXT: ld $1, %got_disp(y)($1)
; MIPS4-NEXT: daddiu $2, $zero, -4
; MIPS4-NEXT: and $2, $1, $2
; MIPS4-NEXT: andi $1, $1, 3
; MIPS4-NEXT: sll $3, $1, 3
; MIPS4-NEXT: ori $1, $zero, 255
; MIPS4-NEXT: sllv $5, $1, $3
; MIPS4-NEXT: nor $6, $zero, $5
; MIPS4-NEXT: sllv $4, $4, $3
; MIPS4-NEXT: .LBB8_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $7, 0($2)
; MIPS4-NEXT: addu $8, $7, $4
; MIPS4-NEXT: and $8, $8, $5
; MIPS4-NEXT: and $9, $7, $6
; MIPS4-NEXT: or $9, $9, $8
; MIPS4-NEXT: sc $9, 0($2)
; MIPS4-NEXT: beqz $9, .LBB8_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: and $1, $7, $5
; MIPS4-NEXT: srlv $1, $1, $3
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: sra $1, $1, 24
; MIPS4-NEXT: # %bb.3: # %entry
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: sra $2, $1, 24
;
; MIPS64-LABEL: AtomicLoadAdd8:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64-NEXT: ld $1, %got_disp(y)($1)
; MIPS64-NEXT: daddiu $2, $zero, -4
; MIPS64-NEXT: and $2, $1, $2
; MIPS64-NEXT: andi $1, $1, 3
; MIPS64-NEXT: sll $3, $1, 3
; MIPS64-NEXT: ori $1, $zero, 255
; MIPS64-NEXT: sllv $5, $1, $3
; MIPS64-NEXT: nor $6, $zero, $5
; MIPS64-NEXT: sllv $4, $4, $3
; MIPS64-NEXT: .LBB8_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $7, 0($2)
; MIPS64-NEXT: addu $8, $7, $4
; MIPS64-NEXT: and $8, $8, $5
; MIPS64-NEXT: and $9, $7, $6
; MIPS64-NEXT: or $9, $9, $8
; MIPS64-NEXT: sc $9, 0($2)
; MIPS64-NEXT: beqz $9, .LBB8_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: and $1, $7, $5
; MIPS64-NEXT: srlv $1, $1, $3
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: sra $1, $1, 24
; MIPS64-NEXT: # %bb.3: # %entry
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: sra $2, $1, 24
;
; MIPS64R2-LABEL: AtomicLoadAdd8:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64R2-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R2-NEXT: daddiu $2, $zero, -4
; MIPS64R2-NEXT: and $2, $1, $2
; MIPS64R2-NEXT: andi $1, $1, 3
; MIPS64R2-NEXT: sll $3, $1, 3
; MIPS64R2-NEXT: ori $1, $zero, 255
; MIPS64R2-NEXT: sllv $5, $1, $3
; MIPS64R2-NEXT: nor $6, $zero, $5
; MIPS64R2-NEXT: sllv $4, $4, $3
; MIPS64R2-NEXT: .LBB8_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $7, 0($2)
; MIPS64R2-NEXT: addu $8, $7, $4
; MIPS64R2-NEXT: and $8, $8, $5
; MIPS64R2-NEXT: and $9, $7, $6
; MIPS64R2-NEXT: or $9, $9, $8
; MIPS64R2-NEXT: sc $9, 0($2)
; MIPS64R2-NEXT: beqz $9, .LBB8_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: and $1, $7, $5
; MIPS64R2-NEXT: srlv $1, $1, $3
; MIPS64R2-NEXT: seb $1, $1
; MIPS64R2-NEXT: # %bb.3: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: seb $2, $1
;
; MIPS64R6-LABEL: AtomicLoadAdd8:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64R6-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6-NEXT: daddiu $2, $zero, -4
; MIPS64R6-NEXT: and $2, $1, $2
; MIPS64R6-NEXT: andi $1, $1, 3
; MIPS64R6-NEXT: sll $3, $1, 3
; MIPS64R6-NEXT: ori $1, $zero, 255
; MIPS64R6-NEXT: sllv $5, $1, $3
; MIPS64R6-NEXT: nor $6, $zero, $5
; MIPS64R6-NEXT: sllv $4, $4, $3
; MIPS64R6-NEXT: .LBB8_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $7, 0($2)
; MIPS64R6-NEXT: addu $8, $7, $4
; MIPS64R6-NEXT: and $8, $8, $5
; MIPS64R6-NEXT: and $9, $7, $6
; MIPS64R6-NEXT: or $9, $9, $8
; MIPS64R6-NEXT: sc $9, 0($2)
; MIPS64R6-NEXT: beqzc $9, .LBB8_1
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: and $1, $7, $5
; MIPS64R6-NEXT: srlv $1, $1, $3
; MIPS64R6-NEXT: seb $1, $1
; MIPS64R6-NEXT: # %bb.3: # %entry
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: seb $2, $1
;
; MIPS64R6O0-LABEL: AtomicLoadAdd8:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
; MIPS64R6O0-NEXT: andi $3, $1, 3
; MIPS64R6O0-NEXT: xori $3, $3, 3
; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 255
; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB8_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $8, 0($2)
; MIPS64R6O0-NEXT: addu $9, $8, $4
; MIPS64R6O0-NEXT: and $9, $9, $5
; MIPS64R6O0-NEXT: and $10, $8, $6
; MIPS64R6O0-NEXT: or $10, $10, $9
; MIPS64R6O0-NEXT: sc $10, 0($2)
; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: and $7, $8, $5
; MIPS64R6O0-NEXT: srlv $7, $7, $3
; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadAdd8:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(y)($2)
; MM32-NEXT: addiu $2, $zero, -4
; MM32-NEXT: and $2, $1, $2
; MM32-NEXT: andi $1, $1, 3
; MM32-NEXT: sll $3, $1, 3
; MM32-NEXT: ori $1, $zero, 255
; MM32-NEXT: sllv $5, $1, $3
; MM32-NEXT: nor $6, $zero, $5
; MM32-NEXT: sllv $4, $4, $3
; MM32-NEXT: $BB8_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $7, 0($2)
; MM32-NEXT: addu $8, $7, $4
; MM32-NEXT: and $8, $8, $5
; MM32-NEXT: and $9, $7, $6
; MM32-NEXT: or $9, $9, $8
; MM32-NEXT: sc $9, 0($2)
; MM32-NEXT: beqzc $9, $BB8_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: and $1, $7, $5
; MM32-NEXT: srlv $1, $1, $3
; MM32-NEXT: seb $1, $1
; MM32-NEXT: # %bb.3: # %entry
; MM32-NEXT: jr $ra
; MM32-NEXT: seb $2, $1
;
; O1-LABEL: AtomicLoadAdd8:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(y)($1)
; O1-NEXT: addiu $2, $zero, -4
; O1-NEXT: and $2, $1, $2
; O1-NEXT: andi $1, $1, 3
; O1-NEXT: sll $3, $1, 3
; O1-NEXT: ori $1, $zero, 255
; O1-NEXT: sllv $5, $1, $3
; O1-NEXT: nor $6, $zero, $5
; O1-NEXT: sllv $4, $4, $3
; O1-NEXT: $BB8_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $7, 0($2)
; O1-NEXT: addu $8, $7, $4
; O1-NEXT: and $8, $8, $5
; O1-NEXT: and $9, $7, $6
; O1-NEXT: or $9, $9, $8
; O1-NEXT: sc $9, 0($2)
; O1-NEXT: beqz $9, $BB8_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: and $1, $7, $5
; O1-NEXT: srlv $1, $1, $3
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: sra $1, $1, 24
; O1-NEXT: # %bb.3: # %entry
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: jr $ra
; O1-NEXT: sra $2, $1, 24
;
; O2-LABEL: AtomicLoadAdd8:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(y)($1)
; O2-NEXT: addiu $2, $zero, -4
; O2-NEXT: and $2, $1, $2
; O2-NEXT: andi $1, $1, 3
; O2-NEXT: sll $3, $1, 3
; O2-NEXT: ori $1, $zero, 255
; O2-NEXT: sllv $5, $1, $3
; O2-NEXT: nor $6, $zero, $5
; O2-NEXT: sllv $4, $4, $3
; O2-NEXT: $BB8_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $7, 0($2)
; O2-NEXT: addu $8, $7, $4
; O2-NEXT: and $8, $8, $5
; O2-NEXT: and $9, $7, $6
; O2-NEXT: or $9, $9, $8
; O2-NEXT: sc $9, 0($2)
; O2-NEXT: beqz $9, $BB8_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: and $1, $7, $5
; O2-NEXT: srlv $1, $1, $3
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: sra $1, $1, 24
; O2-NEXT: # %bb.3: # %entry
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: jr $ra
; O2-NEXT: sra $2, $1, 24
;
; O3-LABEL: AtomicLoadAdd8:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: addiu $2, $zero, -4
; O3-NEXT: lw $1, %got(y)($1)
; O3-NEXT: and $2, $1, $2
; O3-NEXT: andi $1, $1, 3
; O3-NEXT: sll $3, $1, 3
; O3-NEXT: ori $1, $zero, 255
; O3-NEXT: sllv $5, $1, $3
; O3-NEXT: sllv $4, $4, $3
; O3-NEXT: nor $6, $zero, $5
; O3-NEXT: $BB8_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $7, 0($2)
; O3-NEXT: addu $8, $7, $4
; O3-NEXT: and $8, $8, $5
; O3-NEXT: and $9, $7, $6
; O3-NEXT: or $9, $9, $8
; O3-NEXT: sc $9, 0($2)
; O3-NEXT: beqz $9, $BB8_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: and $1, $7, $5
; O3-NEXT: srlv $1, $1, $3
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: sra $1, $1, 24
; O3-NEXT: # %bb.3: # %entry
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: jr $ra
; O3-NEXT: sra $2, $1, 24
;
; MIPS32EB-LABEL: AtomicLoadAdd8:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(y)($1)
; MIPS32EB-NEXT: addiu $2, $zero, -4
; MIPS32EB-NEXT: and $2, $1, $2
; MIPS32EB-NEXT: andi $1, $1, 3
; MIPS32EB-NEXT: xori $1, $1, 3
; MIPS32EB-NEXT: sll $3, $1, 3
; MIPS32EB-NEXT: ori $1, $zero, 255
; MIPS32EB-NEXT: sllv $5, $1, $3
; MIPS32EB-NEXT: nor $6, $zero, $5
; MIPS32EB-NEXT: sllv $4, $4, $3
; MIPS32EB-NEXT: $BB8_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $7, 0($2)
; MIPS32EB-NEXT: addu $8, $7, $4
; MIPS32EB-NEXT: and $8, $8, $5
; MIPS32EB-NEXT: and $9, $7, $6
; MIPS32EB-NEXT: or $9, $9, $8
; MIPS32EB-NEXT: sc $9, 0($2)
; MIPS32EB-NEXT: beqz $9, $BB8_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: and $1, $7, $5
; MIPS32EB-NEXT: srlv $1, $1, $3
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: sra $1, $1, 24
; MIPS32EB-NEXT: # %bb.3: # %entry
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: sra $2, $1, 24
entry:
%0 = atomicrmw add i8* @y, i8 %incr monotonic
ret i8 %0
}
define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadSub8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(y)($1)
; MIPS32-NEXT: addiu $2, $zero, -4
; MIPS32-NEXT: and $2, $1, $2
; MIPS32-NEXT: andi $1, $1, 3
; MIPS32-NEXT: sll $3, $1, 3
; MIPS32-NEXT: ori $1, $zero, 255
; MIPS32-NEXT: sllv $5, $1, $3
; MIPS32-NEXT: nor $6, $zero, $5
; MIPS32-NEXT: sllv $4, $4, $3
; MIPS32-NEXT: $BB9_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $7, 0($2)
; MIPS32-NEXT: subu $8, $7, $4
; MIPS32-NEXT: and $8, $8, $5
; MIPS32-NEXT: and $9, $7, $6
; MIPS32-NEXT: or $9, $9, $8
; MIPS32-NEXT: sc $9, 0($2)
; MIPS32-NEXT: beqz $9, $BB9_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: and $1, $7, $5
; MIPS32-NEXT: srlv $1, $1, $3
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $1, $1, 24
; MIPS32-NEXT: # %bb.3: # %entry
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: sra $2, $1, 24
;
; MIPS32O0-LABEL: AtomicLoadSub8:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
; MIPS32O0-NEXT: sll $1, $1, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
; MIPS32O0-NEXT: sllv $3, $3, $1
; MIPS32O0-NEXT: nor $5, $zero, $3
; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB9_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $7, 0($2)
; MIPS32O0-NEXT: subu $8, $7, $4
; MIPS32O0-NEXT: and $8, $8, $3
; MIPS32O0-NEXT: and $9, $7, $5
; MIPS32O0-NEXT: or $9, $9, $8
; MIPS32O0-NEXT: sc $9, 0($2)
; MIPS32O0-NEXT: beqz $9, $BB9_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: and $6, $7, $3
; MIPS32O0-NEXT: srlv $6, $6, $1
; MIPS32O0-NEXT: sll $6, $6, 24
; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 24
; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadSub8:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(y)($1)
; MIPS32R2-NEXT: addiu $2, $zero, -4
; MIPS32R2-NEXT: and $2, $1, $2
; MIPS32R2-NEXT: andi $1, $1, 3
; MIPS32R2-NEXT: sll $3, $1, 3
; MIPS32R2-NEXT: ori $1, $zero, 255
; MIPS32R2-NEXT: sllv $5, $1, $3
; MIPS32R2-NEXT: nor $6, $zero, $5
; MIPS32R2-NEXT: sllv $4, $4, $3
; MIPS32R2-NEXT: $BB9_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $7, 0($2)
; MIPS32R2-NEXT: subu $8, $7, $4
; MIPS32R2-NEXT: and $8, $8, $5
; MIPS32R2-NEXT: and $9, $7, $6
; MIPS32R2-NEXT: or $9, $9, $8
; MIPS32R2-NEXT: sc $9, 0($2)
; MIPS32R2-NEXT: beqz $9, $BB9_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: and $1, $7, $5
; MIPS32R2-NEXT: srlv $1, $1, $3
; MIPS32R2-NEXT: seb $1, $1
; MIPS32R2-NEXT: # %bb.3: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: seb $2, $1
;
; MIPS32R6-LABEL: AtomicLoadSub8:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(y)($1)
; MIPS32R6-NEXT: addiu $2, $zero, -4
; MIPS32R6-NEXT: and $2, $1, $2
; MIPS32R6-NEXT: andi $1, $1, 3
; MIPS32R6-NEXT: sll $3, $1, 3
; MIPS32R6-NEXT: ori $1, $zero, 255
; MIPS32R6-NEXT: sllv $5, $1, $3
; MIPS32R6-NEXT: nor $6, $zero, $5
; MIPS32R6-NEXT: sllv $4, $4, $3
; MIPS32R6-NEXT: $BB9_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $7, 0($2)
; MIPS32R6-NEXT: subu $8, $7, $4
; MIPS32R6-NEXT: and $8, $8, $5
; MIPS32R6-NEXT: and $9, $7, $6
; MIPS32R6-NEXT: or $9, $9, $8
; MIPS32R6-NEXT: sc $9, 0($2)
; MIPS32R6-NEXT: beqzc $9, $BB9_1
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: and $1, $7, $5
; MIPS32R6-NEXT: srlv $1, $1, $3
; MIPS32R6-NEXT: seb $1, $1
; MIPS32R6-NEXT: # %bb.3: # %entry
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: seb $2, $1
;
; MIPS32R6O0-LABEL: AtomicLoadSub8:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
; MIPS32R6O0-NEXT: addiu $3, $zero, -4
; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
; MIPS32R6O0-NEXT: sll $1, $1, 3
; MIPS32R6O0-NEXT: ori $5, $zero, 255
; MIPS32R6O0-NEXT: sllv $5, $5, $1
; MIPS32R6O0-NEXT: nor $6, $zero, $5
; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB9_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: ll $8, 0($3)
; MIPS32R6O0-NEXT: subu $9, $8, $4
; MIPS32R6O0-NEXT: and $9, $9, $5
; MIPS32R6O0-NEXT: and $10, $8, $6
; MIPS32R6O0-NEXT: or $10, $10, $9
; MIPS32R6O0-NEXT: sc $10, 0($3)
; MIPS32R6O0-NEXT: beqzc $10, $BB9_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: and $7, $8, $5
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadSub8:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS4-NEXT: ld $1, %got_disp(y)($1)
; MIPS4-NEXT: daddiu $2, $zero, -4
; MIPS4-NEXT: and $2, $1, $2
; MIPS4-NEXT: andi $1, $1, 3
; MIPS4-NEXT: sll $3, $1, 3
; MIPS4-NEXT: ori $1, $zero, 255
; MIPS4-NEXT: sllv $5, $1, $3
; MIPS4-NEXT: nor $6, $zero, $5
; MIPS4-NEXT: sllv $4, $4, $3
; MIPS4-NEXT: .LBB9_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $7, 0($2)
; MIPS4-NEXT: subu $8, $7, $4
; MIPS4-NEXT: and $8, $8, $5
; MIPS4-NEXT: and $9, $7, $6
; MIPS4-NEXT: or $9, $9, $8
; MIPS4-NEXT: sc $9, 0($2)
; MIPS4-NEXT: beqz $9, .LBB9_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: and $1, $7, $5
; MIPS4-NEXT: srlv $1, $1, $3
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: sra $1, $1, 24
; MIPS4-NEXT: # %bb.3: # %entry
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: sra $2, $1, 24
;
; MIPS64-LABEL: AtomicLoadSub8:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64-NEXT: ld $1, %got_disp(y)($1)
; MIPS64-NEXT: daddiu $2, $zero, -4
; MIPS64-NEXT: and $2, $1, $2
; MIPS64-NEXT: andi $1, $1, 3
; MIPS64-NEXT: sll $3, $1, 3
; MIPS64-NEXT: ori $1, $zero, 255
; MIPS64-NEXT: sllv $5, $1, $3
; MIPS64-NEXT: nor $6, $zero, $5
; MIPS64-NEXT: sllv $4, $4, $3
; MIPS64-NEXT: .LBB9_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $7, 0($2)
; MIPS64-NEXT: subu $8, $7, $4
; MIPS64-NEXT: and $8, $8, $5
; MIPS64-NEXT: and $9, $7, $6
; MIPS64-NEXT: or $9, $9, $8
; MIPS64-NEXT: sc $9, 0($2)
; MIPS64-NEXT: beqz $9, .LBB9_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: and $1, $7, $5
; MIPS64-NEXT: srlv $1, $1, $3
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: sra $1, $1, 24
; MIPS64-NEXT: # %bb.3: # %entry
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: sra $2, $1, 24
;
; MIPS64R2-LABEL: AtomicLoadSub8:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64R2-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R2-NEXT: daddiu $2, $zero, -4
; MIPS64R2-NEXT: and $2, $1, $2
; MIPS64R2-NEXT: andi $1, $1, 3
; MIPS64R2-NEXT: sll $3, $1, 3
; MIPS64R2-NEXT: ori $1, $zero, 255
; MIPS64R2-NEXT: sllv $5, $1, $3
; MIPS64R2-NEXT: nor $6, $zero, $5
; MIPS64R2-NEXT: sllv $4, $4, $3
; MIPS64R2-NEXT: .LBB9_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $7, 0($2)
; MIPS64R2-NEXT: subu $8, $7, $4
; MIPS64R2-NEXT: and $8, $8, $5
; MIPS64R2-NEXT: and $9, $7, $6
; MIPS64R2-NEXT: or $9, $9, $8
; MIPS64R2-NEXT: sc $9, 0($2)
; MIPS64R2-NEXT: beqz $9, .LBB9_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: and $1, $7, $5
; MIPS64R2-NEXT: srlv $1, $1, $3
; MIPS64R2-NEXT: seb $1, $1
; MIPS64R2-NEXT: # %bb.3: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: seb $2, $1
;
; MIPS64R6-LABEL: AtomicLoadSub8:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64R6-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6-NEXT: daddiu $2, $zero, -4
; MIPS64R6-NEXT: and $2, $1, $2
; MIPS64R6-NEXT: andi $1, $1, 3
; MIPS64R6-NEXT: sll $3, $1, 3
; MIPS64R6-NEXT: ori $1, $zero, 255
; MIPS64R6-NEXT: sllv $5, $1, $3
; MIPS64R6-NEXT: nor $6, $zero, $5
; MIPS64R6-NEXT: sllv $4, $4, $3
; MIPS64R6-NEXT: .LBB9_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $7, 0($2)
; MIPS64R6-NEXT: subu $8, $7, $4
; MIPS64R6-NEXT: and $8, $8, $5
; MIPS64R6-NEXT: and $9, $7, $6
; MIPS64R6-NEXT: or $9, $9, $8
; MIPS64R6-NEXT: sc $9, 0($2)
; MIPS64R6-NEXT: beqzc $9, .LBB9_1
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: and $1, $7, $5
; MIPS64R6-NEXT: srlv $1, $1, $3
; MIPS64R6-NEXT: seb $1, $1
; MIPS64R6-NEXT: # %bb.3: # %entry
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: seb $2, $1
;
; MIPS64R6O0-LABEL: AtomicLoadSub8:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
; MIPS64R6O0-NEXT: andi $3, $1, 3
; MIPS64R6O0-NEXT: xori $3, $3, 3
; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 255
; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB9_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $8, 0($2)
; MIPS64R6O0-NEXT: subu $9, $8, $4
; MIPS64R6O0-NEXT: and $9, $9, $5
; MIPS64R6O0-NEXT: and $10, $8, $6
; MIPS64R6O0-NEXT: or $10, $10, $9
; MIPS64R6O0-NEXT: sc $10, 0($2)
; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: and $7, $8, $5
; MIPS64R6O0-NEXT: srlv $7, $7, $3
; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadSub8:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(y)($2)
; MM32-NEXT: addiu $2, $zero, -4
; MM32-NEXT: and $2, $1, $2
; MM32-NEXT: andi $1, $1, 3
; MM32-NEXT: sll $3, $1, 3
; MM32-NEXT: ori $1, $zero, 255
; MM32-NEXT: sllv $5, $1, $3
; MM32-NEXT: nor $6, $zero, $5
; MM32-NEXT: sllv $4, $4, $3
; MM32-NEXT: $BB9_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $7, 0($2)
; MM32-NEXT: subu $8, $7, $4
; MM32-NEXT: and $8, $8, $5
; MM32-NEXT: and $9, $7, $6
; MM32-NEXT: or $9, $9, $8
; MM32-NEXT: sc $9, 0($2)
; MM32-NEXT: beqzc $9, $BB9_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: and $1, $7, $5
; MM32-NEXT: srlv $1, $1, $3
; MM32-NEXT: seb $1, $1
; MM32-NEXT: # %bb.3: # %entry
; MM32-NEXT: jr $ra
; MM32-NEXT: seb $2, $1
;
; O1-LABEL: AtomicLoadSub8:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(y)($1)
; O1-NEXT: addiu $2, $zero, -4
; O1-NEXT: and $2, $1, $2
; O1-NEXT: andi $1, $1, 3
; O1-NEXT: sll $3, $1, 3
; O1-NEXT: ori $1, $zero, 255
; O1-NEXT: sllv $5, $1, $3
; O1-NEXT: nor $6, $zero, $5
; O1-NEXT: sllv $4, $4, $3
; O1-NEXT: $BB9_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $7, 0($2)
; O1-NEXT: subu $8, $7, $4
; O1-NEXT: and $8, $8, $5
; O1-NEXT: and $9, $7, $6
; O1-NEXT: or $9, $9, $8
; O1-NEXT: sc $9, 0($2)
; O1-NEXT: beqz $9, $BB9_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: and $1, $7, $5
; O1-NEXT: srlv $1, $1, $3
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: sra $1, $1, 24
; O1-NEXT: # %bb.3: # %entry
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: jr $ra
; O1-NEXT: sra $2, $1, 24
;
; O2-LABEL: AtomicLoadSub8:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(y)($1)
; O2-NEXT: addiu $2, $zero, -4
; O2-NEXT: and $2, $1, $2
; O2-NEXT: andi $1, $1, 3
; O2-NEXT: sll $3, $1, 3
; O2-NEXT: ori $1, $zero, 255
; O2-NEXT: sllv $5, $1, $3
; O2-NEXT: nor $6, $zero, $5
; O2-NEXT: sllv $4, $4, $3
; O2-NEXT: $BB9_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $7, 0($2)
; O2-NEXT: subu $8, $7, $4
; O2-NEXT: and $8, $8, $5
; O2-NEXT: and $9, $7, $6
; O2-NEXT: or $9, $9, $8
; O2-NEXT: sc $9, 0($2)
; O2-NEXT: beqz $9, $BB9_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: and $1, $7, $5
; O2-NEXT: srlv $1, $1, $3
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: sra $1, $1, 24
; O2-NEXT: # %bb.3: # %entry
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: jr $ra
; O2-NEXT: sra $2, $1, 24
;
; O3-LABEL: AtomicLoadSub8:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: addiu $2, $zero, -4
; O3-NEXT: lw $1, %got(y)($1)
; O3-NEXT: and $2, $1, $2
; O3-NEXT: andi $1, $1, 3
; O3-NEXT: sll $3, $1, 3
; O3-NEXT: ori $1, $zero, 255
; O3-NEXT: sllv $5, $1, $3
; O3-NEXT: sllv $4, $4, $3
; O3-NEXT: nor $6, $zero, $5
; O3-NEXT: $BB9_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $7, 0($2)
; O3-NEXT: subu $8, $7, $4
; O3-NEXT: and $8, $8, $5
; O3-NEXT: and $9, $7, $6
; O3-NEXT: or $9, $9, $8
; O3-NEXT: sc $9, 0($2)
; O3-NEXT: beqz $9, $BB9_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: and $1, $7, $5
; O3-NEXT: srlv $1, $1, $3
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: sra $1, $1, 24
; O3-NEXT: # %bb.3: # %entry
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: jr $ra
; O3-NEXT: sra $2, $1, 24
;
; MIPS32EB-LABEL: AtomicLoadSub8:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(y)($1)
; MIPS32EB-NEXT: addiu $2, $zero, -4
; MIPS32EB-NEXT: and $2, $1, $2
; MIPS32EB-NEXT: andi $1, $1, 3
; MIPS32EB-NEXT: xori $1, $1, 3
; MIPS32EB-NEXT: sll $3, $1, 3
; MIPS32EB-NEXT: ori $1, $zero, 255
; MIPS32EB-NEXT: sllv $5, $1, $3
; MIPS32EB-NEXT: nor $6, $zero, $5
; MIPS32EB-NEXT: sllv $4, $4, $3
; MIPS32EB-NEXT: $BB9_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $7, 0($2)
; MIPS32EB-NEXT: subu $8, $7, $4
; MIPS32EB-NEXT: and $8, $8, $5
; MIPS32EB-NEXT: and $9, $7, $6
; MIPS32EB-NEXT: or $9, $9, $8
; MIPS32EB-NEXT: sc $9, 0($2)
; MIPS32EB-NEXT: beqz $9, $BB9_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: and $1, $7, $5
; MIPS32EB-NEXT: srlv $1, $1, $3
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: sra $1, $1, 24
; MIPS32EB-NEXT: # %bb.3: # %entry
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: sra $2, $1, 24
entry:
%0 = atomicrmw sub i8* @y, i8 %incr monotonic
ret i8 %0
}
define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadNand8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(y)($1)
; MIPS32-NEXT: addiu $2, $zero, -4
; MIPS32-NEXT: and $2, $1, $2
; MIPS32-NEXT: andi $1, $1, 3
; MIPS32-NEXT: sll $3, $1, 3
; MIPS32-NEXT: ori $1, $zero, 255
; MIPS32-NEXT: sllv $5, $1, $3
; MIPS32-NEXT: nor $6, $zero, $5
; MIPS32-NEXT: sllv $4, $4, $3
; MIPS32-NEXT: $BB10_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $7, 0($2)
; MIPS32-NEXT: and $8, $7, $4
; MIPS32-NEXT: nor $8, $zero, $8
; MIPS32-NEXT: and $8, $8, $5
; MIPS32-NEXT: and $9, $7, $6
; MIPS32-NEXT: or $9, $9, $8
; MIPS32-NEXT: sc $9, 0($2)
; MIPS32-NEXT: beqz $9, $BB10_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: and $1, $7, $5
; MIPS32-NEXT: srlv $1, $1, $3
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $1, $1, 24
; MIPS32-NEXT: # %bb.3: # %entry
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: sra $2, $1, 24
;
; MIPS32O0-LABEL: AtomicLoadNand8:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
; MIPS32O0-NEXT: sll $1, $1, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
; MIPS32O0-NEXT: sllv $3, $3, $1
; MIPS32O0-NEXT: nor $5, $zero, $3
; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB10_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $7, 0($2)
; MIPS32O0-NEXT: and $8, $7, $4
; MIPS32O0-NEXT: nor $8, $zero, $8
; MIPS32O0-NEXT: and $8, $8, $3
; MIPS32O0-NEXT: and $9, $7, $5
; MIPS32O0-NEXT: or $9, $9, $8
; MIPS32O0-NEXT: sc $9, 0($2)
; MIPS32O0-NEXT: beqz $9, $BB10_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: and $6, $7, $3
; MIPS32O0-NEXT: srlv $6, $6, $1
; MIPS32O0-NEXT: sll $6, $6, 24
; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 24
; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadNand8:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(y)($1)
; MIPS32R2-NEXT: addiu $2, $zero, -4
; MIPS32R2-NEXT: and $2, $1, $2
; MIPS32R2-NEXT: andi $1, $1, 3
; MIPS32R2-NEXT: sll $3, $1, 3
; MIPS32R2-NEXT: ori $1, $zero, 255
; MIPS32R2-NEXT: sllv $5, $1, $3
; MIPS32R2-NEXT: nor $6, $zero, $5
; MIPS32R2-NEXT: sllv $4, $4, $3
; MIPS32R2-NEXT: $BB10_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $7, 0($2)
; MIPS32R2-NEXT: and $8, $7, $4
; MIPS32R2-NEXT: nor $8, $zero, $8
; MIPS32R2-NEXT: and $8, $8, $5
; MIPS32R2-NEXT: and $9, $7, $6
; MIPS32R2-NEXT: or $9, $9, $8
; MIPS32R2-NEXT: sc $9, 0($2)
; MIPS32R2-NEXT: beqz $9, $BB10_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: and $1, $7, $5
; MIPS32R2-NEXT: srlv $1, $1, $3
; MIPS32R2-NEXT: seb $1, $1
; MIPS32R2-NEXT: # %bb.3: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: seb $2, $1
;
; MIPS32R6-LABEL: AtomicLoadNand8:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(y)($1)
; MIPS32R6-NEXT: addiu $2, $zero, -4
; MIPS32R6-NEXT: and $2, $1, $2
; MIPS32R6-NEXT: andi $1, $1, 3
; MIPS32R6-NEXT: sll $3, $1, 3
; MIPS32R6-NEXT: ori $1, $zero, 255
; MIPS32R6-NEXT: sllv $5, $1, $3
; MIPS32R6-NEXT: nor $6, $zero, $5
; MIPS32R6-NEXT: sllv $4, $4, $3
; MIPS32R6-NEXT: $BB10_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $7, 0($2)
; MIPS32R6-NEXT: and $8, $7, $4
; MIPS32R6-NEXT: nor $8, $zero, $8
; MIPS32R6-NEXT: and $8, $8, $5
; MIPS32R6-NEXT: and $9, $7, $6
; MIPS32R6-NEXT: or $9, $9, $8
; MIPS32R6-NEXT: sc $9, 0($2)
; MIPS32R6-NEXT: beqzc $9, $BB10_1
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: and $1, $7, $5
; MIPS32R6-NEXT: srlv $1, $1, $3
; MIPS32R6-NEXT: seb $1, $1
; MIPS32R6-NEXT: # %bb.3: # %entry
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: seb $2, $1
;
; MIPS32R6O0-LABEL: AtomicLoadNand8:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
; MIPS32R6O0-NEXT: addiu $3, $zero, -4
; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
; MIPS32R6O0-NEXT: sll $1, $1, 3
; MIPS32R6O0-NEXT: ori $5, $zero, 255
; MIPS32R6O0-NEXT: sllv $5, $5, $1
; MIPS32R6O0-NEXT: nor $6, $zero, $5
; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB10_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: ll $8, 0($3)
; MIPS32R6O0-NEXT: and $9, $8, $4
; MIPS32R6O0-NEXT: nor $9, $zero, $9
; MIPS32R6O0-NEXT: and $9, $9, $5
; MIPS32R6O0-NEXT: and $10, $8, $6
; MIPS32R6O0-NEXT: or $10, $10, $9
; MIPS32R6O0-NEXT: sc $10, 0($3)
; MIPS32R6O0-NEXT: beqzc $10, $BB10_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: and $7, $8, $5
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadNand8:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS4-NEXT: ld $1, %got_disp(y)($1)
; MIPS4-NEXT: daddiu $2, $zero, -4
; MIPS4-NEXT: and $2, $1, $2
; MIPS4-NEXT: andi $1, $1, 3
; MIPS4-NEXT: sll $3, $1, 3
; MIPS4-NEXT: ori $1, $zero, 255
; MIPS4-NEXT: sllv $5, $1, $3
; MIPS4-NEXT: nor $6, $zero, $5
; MIPS4-NEXT: sllv $4, $4, $3
; MIPS4-NEXT: .LBB10_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $7, 0($2)
; MIPS4-NEXT: and $8, $7, $4
; MIPS4-NEXT: nor $8, $zero, $8
; MIPS4-NEXT: and $8, $8, $5
; MIPS4-NEXT: and $9, $7, $6
; MIPS4-NEXT: or $9, $9, $8
; MIPS4-NEXT: sc $9, 0($2)
; MIPS4-NEXT: beqz $9, .LBB10_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: and $1, $7, $5
; MIPS4-NEXT: srlv $1, $1, $3
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: sra $1, $1, 24
; MIPS4-NEXT: # %bb.3: # %entry
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: sra $2, $1, 24
;
; MIPS64-LABEL: AtomicLoadNand8:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64-NEXT: ld $1, %got_disp(y)($1)
; MIPS64-NEXT: daddiu $2, $zero, -4
; MIPS64-NEXT: and $2, $1, $2
; MIPS64-NEXT: andi $1, $1, 3
; MIPS64-NEXT: sll $3, $1, 3
; MIPS64-NEXT: ori $1, $zero, 255
; MIPS64-NEXT: sllv $5, $1, $3
; MIPS64-NEXT: nor $6, $zero, $5
; MIPS64-NEXT: sllv $4, $4, $3
; MIPS64-NEXT: .LBB10_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $7, 0($2)
; MIPS64-NEXT: and $8, $7, $4
; MIPS64-NEXT: nor $8, $zero, $8
; MIPS64-NEXT: and $8, $8, $5
; MIPS64-NEXT: and $9, $7, $6
; MIPS64-NEXT: or $9, $9, $8
; MIPS64-NEXT: sc $9, 0($2)
; MIPS64-NEXT: beqz $9, .LBB10_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: and $1, $7, $5
; MIPS64-NEXT: srlv $1, $1, $3
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: sra $1, $1, 24
; MIPS64-NEXT: # %bb.3: # %entry
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: sra $2, $1, 24
;
; MIPS64R2-LABEL: AtomicLoadNand8:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64R2-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R2-NEXT: daddiu $2, $zero, -4
; MIPS64R2-NEXT: and $2, $1, $2
; MIPS64R2-NEXT: andi $1, $1, 3
; MIPS64R2-NEXT: sll $3, $1, 3
; MIPS64R2-NEXT: ori $1, $zero, 255
; MIPS64R2-NEXT: sllv $5, $1, $3
; MIPS64R2-NEXT: nor $6, $zero, $5
; MIPS64R2-NEXT: sllv $4, $4, $3
; MIPS64R2-NEXT: .LBB10_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $7, 0($2)
; MIPS64R2-NEXT: and $8, $7, $4
; MIPS64R2-NEXT: nor $8, $zero, $8
; MIPS64R2-NEXT: and $8, $8, $5
; MIPS64R2-NEXT: and $9, $7, $6
; MIPS64R2-NEXT: or $9, $9, $8
; MIPS64R2-NEXT: sc $9, 0($2)
; MIPS64R2-NEXT: beqz $9, .LBB10_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: and $1, $7, $5
; MIPS64R2-NEXT: srlv $1, $1, $3
; MIPS64R2-NEXT: seb $1, $1
; MIPS64R2-NEXT: # %bb.3: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: seb $2, $1
;
; MIPS64R6-LABEL: AtomicLoadNand8:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64R6-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6-NEXT: daddiu $2, $zero, -4
; MIPS64R6-NEXT: and $2, $1, $2
; MIPS64R6-NEXT: andi $1, $1, 3
; MIPS64R6-NEXT: sll $3, $1, 3
; MIPS64R6-NEXT: ori $1, $zero, 255
; MIPS64R6-NEXT: sllv $5, $1, $3
; MIPS64R6-NEXT: nor $6, $zero, $5
; MIPS64R6-NEXT: sllv $4, $4, $3
; MIPS64R6-NEXT: .LBB10_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $7, 0($2)
; MIPS64R6-NEXT: and $8, $7, $4
; MIPS64R6-NEXT: nor $8, $zero, $8
; MIPS64R6-NEXT: and $8, $8, $5
; MIPS64R6-NEXT: and $9, $7, $6
; MIPS64R6-NEXT: or $9, $9, $8
; MIPS64R6-NEXT: sc $9, 0($2)
; MIPS64R6-NEXT: beqzc $9, .LBB10_1
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: and $1, $7, $5
; MIPS64R6-NEXT: srlv $1, $1, $3
; MIPS64R6-NEXT: seb $1, $1
; MIPS64R6-NEXT: # %bb.3: # %entry
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: seb $2, $1
;
; MIPS64R6O0-LABEL: AtomicLoadNand8:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
; MIPS64R6O0-NEXT: andi $3, $1, 3
; MIPS64R6O0-NEXT: xori $3, $3, 3
; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 255
; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB10_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $8, 0($2)
; MIPS64R6O0-NEXT: and $9, $8, $4
; MIPS64R6O0-NEXT: nor $9, $zero, $9
; MIPS64R6O0-NEXT: and $9, $9, $5
; MIPS64R6O0-NEXT: and $10, $8, $6
; MIPS64R6O0-NEXT: or $10, $10, $9
; MIPS64R6O0-NEXT: sc $10, 0($2)
; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: and $7, $8, $5
; MIPS64R6O0-NEXT: srlv $7, $7, $3
; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadNand8:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(y)($2)
; MM32-NEXT: addiu $2, $zero, -4
; MM32-NEXT: and $2, $1, $2
; MM32-NEXT: andi $1, $1, 3
; MM32-NEXT: sll $3, $1, 3
; MM32-NEXT: ori $1, $zero, 255
; MM32-NEXT: sllv $5, $1, $3
; MM32-NEXT: nor $6, $zero, $5
; MM32-NEXT: sllv $4, $4, $3
; MM32-NEXT: $BB10_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $7, 0($2)
; MM32-NEXT: and $8, $7, $4
; MM32-NEXT: nor $8, $zero, $8
; MM32-NEXT: and $8, $8, $5
; MM32-NEXT: and $9, $7, $6
; MM32-NEXT: or $9, $9, $8
; MM32-NEXT: sc $9, 0($2)
; MM32-NEXT: beqzc $9, $BB10_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: and $1, $7, $5
; MM32-NEXT: srlv $1, $1, $3
; MM32-NEXT: seb $1, $1
; MM32-NEXT: # %bb.3: # %entry
; MM32-NEXT: jr $ra
; MM32-NEXT: seb $2, $1
;
; O1-LABEL: AtomicLoadNand8:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(y)($1)
; O1-NEXT: addiu $2, $zero, -4
; O1-NEXT: and $2, $1, $2
; O1-NEXT: andi $1, $1, 3
; O1-NEXT: sll $3, $1, 3
; O1-NEXT: ori $1, $zero, 255
; O1-NEXT: sllv $5, $1, $3
; O1-NEXT: nor $6, $zero, $5
; O1-NEXT: sllv $4, $4, $3
; O1-NEXT: $BB10_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $7, 0($2)
; O1-NEXT: and $8, $7, $4
; O1-NEXT: nor $8, $zero, $8
; O1-NEXT: and $8, $8, $5
; O1-NEXT: and $9, $7, $6
; O1-NEXT: or $9, $9, $8
; O1-NEXT: sc $9, 0($2)
; O1-NEXT: beqz $9, $BB10_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: and $1, $7, $5
; O1-NEXT: srlv $1, $1, $3
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: sra $1, $1, 24
; O1-NEXT: # %bb.3: # %entry
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: jr $ra
; O1-NEXT: sra $2, $1, 24
;
; O2-LABEL: AtomicLoadNand8:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(y)($1)
; O2-NEXT: addiu $2, $zero, -4
; O2-NEXT: and $2, $1, $2
; O2-NEXT: andi $1, $1, 3
; O2-NEXT: sll $3, $1, 3
; O2-NEXT: ori $1, $zero, 255
; O2-NEXT: sllv $5, $1, $3
; O2-NEXT: nor $6, $zero, $5
; O2-NEXT: sllv $4, $4, $3
; O2-NEXT: $BB10_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $7, 0($2)
; O2-NEXT: and $8, $7, $4
; O2-NEXT: nor $8, $zero, $8
; O2-NEXT: and $8, $8, $5
; O2-NEXT: and $9, $7, $6
; O2-NEXT: or $9, $9, $8
; O2-NEXT: sc $9, 0($2)
; O2-NEXT: beqz $9, $BB10_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: and $1, $7, $5
; O2-NEXT: srlv $1, $1, $3
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: sra $1, $1, 24
; O2-NEXT: # %bb.3: # %entry
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: jr $ra
; O2-NEXT: sra $2, $1, 24
;
; O3-LABEL: AtomicLoadNand8:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: addiu $2, $zero, -4
; O3-NEXT: lw $1, %got(y)($1)
; O3-NEXT: and $2, $1, $2
; O3-NEXT: andi $1, $1, 3
; O3-NEXT: sll $3, $1, 3
; O3-NEXT: ori $1, $zero, 255
; O3-NEXT: sllv $5, $1, $3
; O3-NEXT: sllv $4, $4, $3
; O3-NEXT: nor $6, $zero, $5
; O3-NEXT: $BB10_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $7, 0($2)
; O3-NEXT: and $8, $7, $4
; O3-NEXT: nor $8, $zero, $8
; O3-NEXT: and $8, $8, $5
; O3-NEXT: and $9, $7, $6
; O3-NEXT: or $9, $9, $8
; O3-NEXT: sc $9, 0($2)
; O3-NEXT: beqz $9, $BB10_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: and $1, $7, $5
; O3-NEXT: srlv $1, $1, $3
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: sra $1, $1, 24
; O3-NEXT: # %bb.3: # %entry
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: jr $ra
; O3-NEXT: sra $2, $1, 24
;
; MIPS32EB-LABEL: AtomicLoadNand8:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(y)($1)
; MIPS32EB-NEXT: addiu $2, $zero, -4
; MIPS32EB-NEXT: and $2, $1, $2
; MIPS32EB-NEXT: andi $1, $1, 3
; MIPS32EB-NEXT: xori $1, $1, 3
; MIPS32EB-NEXT: sll $3, $1, 3
; MIPS32EB-NEXT: ori $1, $zero, 255
; MIPS32EB-NEXT: sllv $5, $1, $3
; MIPS32EB-NEXT: nor $6, $zero, $5
; MIPS32EB-NEXT: sllv $4, $4, $3
; MIPS32EB-NEXT: $BB10_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $7, 0($2)
; MIPS32EB-NEXT: and $8, $7, $4
; MIPS32EB-NEXT: nor $8, $zero, $8
; MIPS32EB-NEXT: and $8, $8, $5
; MIPS32EB-NEXT: and $9, $7, $6
; MIPS32EB-NEXT: or $9, $9, $8
; MIPS32EB-NEXT: sc $9, 0($2)
; MIPS32EB-NEXT: beqz $9, $BB10_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: and $1, $7, $5
; MIPS32EB-NEXT: srlv $1, $1, $3
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: sra $1, $1, 24
; MIPS32EB-NEXT: # %bb.3: # %entry
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: sra $2, $1, 24
entry:
%0 = atomicrmw nand i8* @y, i8 %incr monotonic
ret i8 %0
}
define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
; MIPS32-LABEL: AtomicSwap8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(y)($1)
; MIPS32-NEXT: addiu $2, $zero, -4
; MIPS32-NEXT: and $2, $1, $2
; MIPS32-NEXT: andi $1, $1, 3
; MIPS32-NEXT: sll $3, $1, 3
; MIPS32-NEXT: ori $1, $zero, 255
; MIPS32-NEXT: sllv $5, $1, $3
; MIPS32-NEXT: nor $6, $zero, $5
; MIPS32-NEXT: sllv $4, $4, $3
; MIPS32-NEXT: $BB11_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $7, 0($2)
; MIPS32-NEXT: and $8, $4, $5
; MIPS32-NEXT: and $9, $7, $6
; MIPS32-NEXT: or $9, $9, $8
; MIPS32-NEXT: sc $9, 0($2)
; MIPS32-NEXT: beqz $9, $BB11_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: and $1, $7, $5
; MIPS32-NEXT: srlv $1, $1, $3
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $1, $1, 24
; MIPS32-NEXT: # %bb.3: # %entry
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: sra $2, $1, 24
;
; MIPS32O0-LABEL: AtomicSwap8:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
; MIPS32O0-NEXT: sll $1, $1, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
; MIPS32O0-NEXT: sllv $3, $3, $1
; MIPS32O0-NEXT: nor $5, $zero, $3
; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB11_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $7, 0($2)
; MIPS32O0-NEXT: and $8, $4, $3
; MIPS32O0-NEXT: and $9, $7, $5
; MIPS32O0-NEXT: or $9, $9, $8
; MIPS32O0-NEXT: sc $9, 0($2)
; MIPS32O0-NEXT: beqz $9, $BB11_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: and $6, $7, $3
; MIPS32O0-NEXT: srlv $6, $6, $1
; MIPS32O0-NEXT: sll $6, $6, 24
; MIPS32O0-NEXT: sra $6, $6, 24
; MIPS32O0-NEXT: # %bb.3: # %entry
; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 24
; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicSwap8:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(y)($1)
; MIPS32R2-NEXT: addiu $2, $zero, -4
; MIPS32R2-NEXT: and $2, $1, $2
; MIPS32R2-NEXT: andi $1, $1, 3
; MIPS32R2-NEXT: sll $3, $1, 3
; MIPS32R2-NEXT: ori $1, $zero, 255
; MIPS32R2-NEXT: sllv $5, $1, $3
; MIPS32R2-NEXT: nor $6, $zero, $5
; MIPS32R2-NEXT: sllv $4, $4, $3
; MIPS32R2-NEXT: $BB11_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $7, 0($2)
; MIPS32R2-NEXT: and $8, $4, $5
; MIPS32R2-NEXT: and $9, $7, $6
; MIPS32R2-NEXT: or $9, $9, $8
; MIPS32R2-NEXT: sc $9, 0($2)
; MIPS32R2-NEXT: beqz $9, $BB11_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: and $1, $7, $5
; MIPS32R2-NEXT: srlv $1, $1, $3
; MIPS32R2-NEXT: seb $1, $1
; MIPS32R2-NEXT: # %bb.3: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: seb $2, $1
;
; MIPS32R6-LABEL: AtomicSwap8:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(y)($1)
; MIPS32R6-NEXT: addiu $2, $zero, -4
; MIPS32R6-NEXT: and $2, $1, $2
; MIPS32R6-NEXT: andi $1, $1, 3
; MIPS32R6-NEXT: sll $3, $1, 3
; MIPS32R6-NEXT: ori $1, $zero, 255
; MIPS32R6-NEXT: sllv $5, $1, $3
; MIPS32R6-NEXT: nor $6, $zero, $5
; MIPS32R6-NEXT: sllv $4, $4, $3
; MIPS32R6-NEXT: $BB11_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $7, 0($2)
; MIPS32R6-NEXT: and $8, $4, $5
; MIPS32R6-NEXT: and $9, $7, $6
; MIPS32R6-NEXT: or $9, $9, $8
; MIPS32R6-NEXT: sc $9, 0($2)
; MIPS32R6-NEXT: beqzc $9, $BB11_1
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: and $1, $7, $5
; MIPS32R6-NEXT: srlv $1, $1, $3
; MIPS32R6-NEXT: seb $1, $1
; MIPS32R6-NEXT: # %bb.3: # %entry
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: seb $2, $1
;
; MIPS32R6O0-LABEL: AtomicSwap8:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
; MIPS32R6O0-NEXT: addiu $3, $zero, -4
; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
; MIPS32R6O0-NEXT: sll $1, $1, 3
; MIPS32R6O0-NEXT: ori $5, $zero, 255
; MIPS32R6O0-NEXT: sllv $5, $5, $1
; MIPS32R6O0-NEXT: nor $6, $zero, $5
; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB11_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: ll $8, 0($3)
; MIPS32R6O0-NEXT: and $9, $4, $5
; MIPS32R6O0-NEXT: and $10, $8, $6
; MIPS32R6O0-NEXT: or $10, $10, $9
; MIPS32R6O0-NEXT: sc $10, 0($3)
; MIPS32R6O0-NEXT: beqzc $10, $BB11_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: and $7, $8, $5
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicSwap8:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8)))
; MIPS4-NEXT: ld $1, %got_disp(y)($1)
; MIPS4-NEXT: daddiu $2, $zero, -4
; MIPS4-NEXT: and $2, $1, $2
; MIPS4-NEXT: andi $1, $1, 3
; MIPS4-NEXT: sll $3, $1, 3
; MIPS4-NEXT: ori $1, $zero, 255
; MIPS4-NEXT: sllv $5, $1, $3
; MIPS4-NEXT: nor $6, $zero, $5
; MIPS4-NEXT: sllv $4, $4, $3
; MIPS4-NEXT: .LBB11_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $7, 0($2)
; MIPS4-NEXT: and $8, $4, $5
; MIPS4-NEXT: and $9, $7, $6
; MIPS4-NEXT: or $9, $9, $8
; MIPS4-NEXT: sc $9, 0($2)
; MIPS4-NEXT: beqz $9, .LBB11_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: and $1, $7, $5
; MIPS4-NEXT: srlv $1, $1, $3
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: sra $1, $1, 24
; MIPS4-NEXT: # %bb.3: # %entry
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: sra $2, $1, 24
;
; MIPS64-LABEL: AtomicSwap8:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8)))
; MIPS64-NEXT: ld $1, %got_disp(y)($1)
; MIPS64-NEXT: daddiu $2, $zero, -4
; MIPS64-NEXT: and $2, $1, $2
; MIPS64-NEXT: andi $1, $1, 3
; MIPS64-NEXT: sll $3, $1, 3
; MIPS64-NEXT: ori $1, $zero, 255
; MIPS64-NEXT: sllv $5, $1, $3
; MIPS64-NEXT: nor $6, $zero, $5
; MIPS64-NEXT: sllv $4, $4, $3
; MIPS64-NEXT: .LBB11_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $7, 0($2)
; MIPS64-NEXT: and $8, $4, $5
; MIPS64-NEXT: and $9, $7, $6
; MIPS64-NEXT: or $9, $9, $8
; MIPS64-NEXT: sc $9, 0($2)
; MIPS64-NEXT: beqz $9, .LBB11_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: and $1, $7, $5
; MIPS64-NEXT: srlv $1, $1, $3
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: sra $1, $1, 24
; MIPS64-NEXT: # %bb.3: # %entry
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: sra $2, $1, 24
;
; MIPS64R2-LABEL: AtomicSwap8:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8)))
; MIPS64R2-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R2-NEXT: daddiu $2, $zero, -4
; MIPS64R2-NEXT: and $2, $1, $2
; MIPS64R2-NEXT: andi $1, $1, 3
; MIPS64R2-NEXT: sll $3, $1, 3
; MIPS64R2-NEXT: ori $1, $zero, 255
; MIPS64R2-NEXT: sllv $5, $1, $3
; MIPS64R2-NEXT: nor $6, $zero, $5
; MIPS64R2-NEXT: sllv $4, $4, $3
; MIPS64R2-NEXT: .LBB11_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $7, 0($2)
; MIPS64R2-NEXT: and $8, $4, $5
; MIPS64R2-NEXT: and $9, $7, $6
; MIPS64R2-NEXT: or $9, $9, $8
; MIPS64R2-NEXT: sc $9, 0($2)
; MIPS64R2-NEXT: beqz $9, .LBB11_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: and $1, $7, $5
; MIPS64R2-NEXT: srlv $1, $1, $3
; MIPS64R2-NEXT: seb $1, $1
; MIPS64R2-NEXT: # %bb.3: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: seb $2, $1
;
; MIPS64R6-LABEL: AtomicSwap8:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8)))
; MIPS64R6-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6-NEXT: daddiu $2, $zero, -4
; MIPS64R6-NEXT: and $2, $1, $2
; MIPS64R6-NEXT: andi $1, $1, 3
; MIPS64R6-NEXT: sll $3, $1, 3
; MIPS64R6-NEXT: ori $1, $zero, 255
; MIPS64R6-NEXT: sllv $5, $1, $3
; MIPS64R6-NEXT: nor $6, $zero, $5
; MIPS64R6-NEXT: sllv $4, $4, $3
; MIPS64R6-NEXT: .LBB11_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $7, 0($2)
; MIPS64R6-NEXT: and $8, $4, $5
; MIPS64R6-NEXT: and $9, $7, $6
; MIPS64R6-NEXT: or $9, $9, $8
; MIPS64R6-NEXT: sc $9, 0($2)
; MIPS64R6-NEXT: beqzc $9, .LBB11_1
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: and $1, $7, $5
; MIPS64R6-NEXT: srlv $1, $1, $3
; MIPS64R6-NEXT: seb $1, $1
; MIPS64R6-NEXT: # %bb.3: # %entry
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: seb $2, $1
;
; MIPS64R6O0-LABEL: AtomicSwap8:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
; MIPS64R6O0-NEXT: andi $3, $1, 3
; MIPS64R6O0-NEXT: xori $3, $3, 3
; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 255
; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB11_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $8, 0($2)
; MIPS64R6O0-NEXT: and $9, $4, $5
; MIPS64R6O0-NEXT: and $10, $8, $6
; MIPS64R6O0-NEXT: or $10, $10, $9
; MIPS64R6O0-NEXT: sc $10, 0($2)
; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: and $7, $8, $5
; MIPS64R6O0-NEXT: srlv $7, $7, $3
; MIPS64R6O0-NEXT: seb $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seb $2, $1
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicSwap8:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(y)($2)
; MM32-NEXT: addiu $2, $zero, -4
; MM32-NEXT: and $2, $1, $2
; MM32-NEXT: andi $1, $1, 3
; MM32-NEXT: sll $3, $1, 3
; MM32-NEXT: ori $1, $zero, 255
; MM32-NEXT: sllv $5, $1, $3
; MM32-NEXT: nor $6, $zero, $5
; MM32-NEXT: sllv $4, $4, $3
; MM32-NEXT: $BB11_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $7, 0($2)
; MM32-NEXT: and $8, $4, $5
; MM32-NEXT: and $9, $7, $6
; MM32-NEXT: or $9, $9, $8
; MM32-NEXT: sc $9, 0($2)
; MM32-NEXT: beqzc $9, $BB11_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: and $1, $7, $5
; MM32-NEXT: srlv $1, $1, $3
; MM32-NEXT: seb $1, $1
; MM32-NEXT: # %bb.3: # %entry
; MM32-NEXT: jr $ra
; MM32-NEXT: seb $2, $1
;
; O1-LABEL: AtomicSwap8:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(y)($1)
; O1-NEXT: addiu $2, $zero, -4
; O1-NEXT: and $2, $1, $2
; O1-NEXT: andi $1, $1, 3
; O1-NEXT: sll $3, $1, 3
; O1-NEXT: ori $1, $zero, 255
; O1-NEXT: sllv $5, $1, $3
; O1-NEXT: nor $6, $zero, $5
; O1-NEXT: sllv $4, $4, $3
; O1-NEXT: $BB11_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $7, 0($2)
; O1-NEXT: and $8, $4, $5
; O1-NEXT: and $9, $7, $6
; O1-NEXT: or $9, $9, $8
; O1-NEXT: sc $9, 0($2)
; O1-NEXT: beqz $9, $BB11_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: and $1, $7, $5
; O1-NEXT: srlv $1, $1, $3
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: sra $1, $1, 24
; O1-NEXT: # %bb.3: # %entry
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: jr $ra
; O1-NEXT: sra $2, $1, 24
;
; O2-LABEL: AtomicSwap8:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(y)($1)
; O2-NEXT: addiu $2, $zero, -4
; O2-NEXT: and $2, $1, $2
; O2-NEXT: andi $1, $1, 3
; O2-NEXT: sll $3, $1, 3
; O2-NEXT: ori $1, $zero, 255
; O2-NEXT: sllv $5, $1, $3
; O2-NEXT: nor $6, $zero, $5
; O2-NEXT: sllv $4, $4, $3
; O2-NEXT: $BB11_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $7, 0($2)
; O2-NEXT: and $8, $4, $5
; O2-NEXT: and $9, $7, $6
; O2-NEXT: or $9, $9, $8
; O2-NEXT: sc $9, 0($2)
; O2-NEXT: beqz $9, $BB11_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: and $1, $7, $5
; O2-NEXT: srlv $1, $1, $3
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: sra $1, $1, 24
; O2-NEXT: # %bb.3: # %entry
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: jr $ra
; O2-NEXT: sra $2, $1, 24
;
; O3-LABEL: AtomicSwap8:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: addiu $2, $zero, -4
; O3-NEXT: lw $1, %got(y)($1)
; O3-NEXT: and $2, $1, $2
; O3-NEXT: andi $1, $1, 3
; O3-NEXT: sll $3, $1, 3
; O3-NEXT: ori $1, $zero, 255
; O3-NEXT: sllv $5, $1, $3
; O3-NEXT: sllv $4, $4, $3
; O3-NEXT: nor $6, $zero, $5
; O3-NEXT: $BB11_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $7, 0($2)
; O3-NEXT: and $8, $4, $5
; O3-NEXT: and $9, $7, $6
; O3-NEXT: or $9, $9, $8
; O3-NEXT: sc $9, 0($2)
; O3-NEXT: beqz $9, $BB11_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: and $1, $7, $5
; O3-NEXT: srlv $1, $1, $3
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: sra $1, $1, 24
; O3-NEXT: # %bb.3: # %entry
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: jr $ra
; O3-NEXT: sra $2, $1, 24
;
; MIPS32EB-LABEL: AtomicSwap8:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(y)($1)
; MIPS32EB-NEXT: addiu $2, $zero, -4
; MIPS32EB-NEXT: and $2, $1, $2
; MIPS32EB-NEXT: andi $1, $1, 3
; MIPS32EB-NEXT: xori $1, $1, 3
; MIPS32EB-NEXT: sll $3, $1, 3
; MIPS32EB-NEXT: ori $1, $zero, 255
; MIPS32EB-NEXT: sllv $5, $1, $3
; MIPS32EB-NEXT: nor $6, $zero, $5
; MIPS32EB-NEXT: sllv $4, $4, $3
; MIPS32EB-NEXT: $BB11_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $7, 0($2)
; MIPS32EB-NEXT: and $8, $4, $5
; MIPS32EB-NEXT: and $9, $7, $6
; MIPS32EB-NEXT: or $9, $9, $8
; MIPS32EB-NEXT: sc $9, 0($2)
; MIPS32EB-NEXT: beqz $9, $BB11_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: and $1, $7, $5
; MIPS32EB-NEXT: srlv $1, $1, $3
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: sra $1, $1, 24
; MIPS32EB-NEXT: # %bb.3: # %entry
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: sra $2, $1, 24
entry:
%0 = atomicrmw xchg i8* @y, i8 %newval monotonic
ret i8 %0
}
define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
; MIPS32-LABEL: AtomicCmpSwap8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(y)($1)
; MIPS32-NEXT: addiu $2, $zero, -4
; MIPS32-NEXT: and $2, $1, $2
; MIPS32-NEXT: andi $1, $1, 3
; MIPS32-NEXT: sll $3, $1, 3
; MIPS32-NEXT: ori $1, $zero, 255
; MIPS32-NEXT: sllv $6, $1, $3
; MIPS32-NEXT: nor $7, $zero, $6
; MIPS32-NEXT: andi $1, $4, 255
; MIPS32-NEXT: sllv $4, $1, $3
; MIPS32-NEXT: andi $1, $5, 255
; MIPS32-NEXT: sllv $5, $1, $3
; MIPS32-NEXT: $BB12_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $8, 0($2)
; MIPS32-NEXT: and $9, $8, $6
; MIPS32-NEXT: bne $9, $4, $BB12_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS32-NEXT: and $8, $8, $7
; MIPS32-NEXT: or $8, $8, $5
; MIPS32-NEXT: sc $8, 0($2)
; MIPS32-NEXT: beqz $8, $BB12_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB12_3: # %entry
; MIPS32-NEXT: srlv $1, $9, $3
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $1, $1, 24
; MIPS32-NEXT: # %bb.4: # %entry
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: sra $2, $1, 24
;
; MIPS32O0-LABEL: AtomicCmpSwap8:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(y)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
; MIPS32O0-NEXT: sll $1, $1, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
; MIPS32O0-NEXT: sllv $3, $3, $1
; MIPS32O0-NEXT: nor $6, $zero, $3
; MIPS32O0-NEXT: andi $4, $4, 255
; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: andi $5, $5, 255
; MIPS32O0-NEXT: sllv $5, $5, $1
; MIPS32O0-NEXT: $BB12_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $8, 0($2)
; MIPS32O0-NEXT: and $9, $8, $3
; MIPS32O0-NEXT: bne $9, $4, $BB12_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS32O0-NEXT: and $8, $8, $6
; MIPS32O0-NEXT: or $8, $8, $5
; MIPS32O0-NEXT: sc $8, 0($2)
; MIPS32O0-NEXT: beqz $8, $BB12_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB12_3: # %entry
; MIPS32O0-NEXT: srlv $7, $9, $1
; MIPS32O0-NEXT: sll $7, $7, 24
; MIPS32O0-NEXT: sra $7, $7, 24
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.5: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 24
; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicCmpSwap8:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(y)($1)
; MIPS32R2-NEXT: addiu $2, $zero, -4
; MIPS32R2-NEXT: and $3, $1, $2
; MIPS32R2-NEXT: andi $1, $1, 3
; MIPS32R2-NEXT: sll $1, $1, 3
; MIPS32R2-NEXT: ori $2, $zero, 255
; MIPS32R2-NEXT: sllv $6, $2, $1
; MIPS32R2-NEXT: nor $7, $zero, $6
; MIPS32R2-NEXT: andi $2, $4, 255
; MIPS32R2-NEXT: sllv $4, $2, $1
; MIPS32R2-NEXT: andi $2, $5, 255
; MIPS32R2-NEXT: sllv $5, $2, $1
; MIPS32R2-NEXT: $BB12_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $8, 0($3)
; MIPS32R2-NEXT: and $9, $8, $6
; MIPS32R2-NEXT: bne $9, $4, $BB12_3
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS32R2-NEXT: and $8, $8, $7
; MIPS32R2-NEXT: or $8, $8, $5
; MIPS32R2-NEXT: sc $8, 0($3)
; MIPS32R2-NEXT: beqz $8, $BB12_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: $BB12_3: # %entry
; MIPS32R2-NEXT: srlv $2, $9, $1
; MIPS32R2-NEXT: seb $2, $2
; MIPS32R2-NEXT: # %bb.4: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicCmpSwap8:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(y)($1)
; MIPS32R6-NEXT: addiu $2, $zero, -4
; MIPS32R6-NEXT: and $3, $1, $2
; MIPS32R6-NEXT: andi $1, $1, 3
; MIPS32R6-NEXT: sll $1, $1, 3
; MIPS32R6-NEXT: ori $2, $zero, 255
; MIPS32R6-NEXT: sllv $6, $2, $1
; MIPS32R6-NEXT: nor $7, $zero, $6
; MIPS32R6-NEXT: andi $2, $4, 255
; MIPS32R6-NEXT: sllv $4, $2, $1
; MIPS32R6-NEXT: andi $2, $5, 255
; MIPS32R6-NEXT: sllv $5, $2, $1
; MIPS32R6-NEXT: $BB12_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $8, 0($3)
; MIPS32R6-NEXT: and $9, $8, $6
; MIPS32R6-NEXT: bnec $9, $4, $BB12_3
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS32R6-NEXT: and $8, $8, $7
; MIPS32R6-NEXT: or $8, $8, $5
; MIPS32R6-NEXT: sc $8, 0($3)
; MIPS32R6-NEXT: beqzc $8, $BB12_1
; MIPS32R6-NEXT: $BB12_3: # %entry
; MIPS32R6-NEXT: srlv $2, $9, $1
; MIPS32R6-NEXT: seb $2, $2
; MIPS32R6-NEXT: # %bb.4: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicCmpSwap8:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $5
; MIPS32R6O0-NEXT: move $3, $4
; MIPS32R6O0-NEXT: lw $1, %got(y)($1)
; MIPS32R6O0-NEXT: addiu $6, $zero, -4
; MIPS32R6O0-NEXT: and $6, $1, $6
; MIPS32R6O0-NEXT: andi $1, $1, 3
; MIPS32R6O0-NEXT: sll $1, $1, 3
; MIPS32R6O0-NEXT: ori $7, $zero, 255
; MIPS32R6O0-NEXT: sllv $7, $7, $1
; MIPS32R6O0-NEXT: nor $8, $zero, $7
; MIPS32R6O0-NEXT: andi $4, $4, 255
; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: andi $5, $5, 255
; MIPS32R6O0-NEXT: sllv $5, $5, $1
; MIPS32R6O0-NEXT: $BB12_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: ll $10, 0($6)
; MIPS32R6O0-NEXT: and $11, $10, $7
; MIPS32R6O0-NEXT: bnec $11, $4, $BB12_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS32R6O0-NEXT: and $10, $10, $8
; MIPS32R6O0-NEXT: or $10, $10, $5
; MIPS32R6O0-NEXT: sc $10, 0($6)
; MIPS32R6O0-NEXT: beqzc $10, $BB12_1
; MIPS32R6O0-NEXT: $BB12_3: # %entry
; MIPS32R6O0-NEXT: srlv $9, $11, $1
; MIPS32R6O0-NEXT: seb $9, $9
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: sw $9, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicCmpSwap8:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS4-NEXT: ld $1, %got_disp(y)($1)
; MIPS4-NEXT: daddiu $2, $zero, -4
; MIPS4-NEXT: and $2, $1, $2
; MIPS4-NEXT: andi $1, $1, 3
; MIPS4-NEXT: sll $3, $1, 3
; MIPS4-NEXT: ori $1, $zero, 255
; MIPS4-NEXT: sllv $6, $1, $3
; MIPS4-NEXT: nor $7, $zero, $6
; MIPS4-NEXT: andi $1, $4, 255
; MIPS4-NEXT: sllv $4, $1, $3
; MIPS4-NEXT: andi $1, $5, 255
; MIPS4-NEXT: sllv $5, $1, $3
; MIPS4-NEXT: .LBB12_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $8, 0($2)
; MIPS4-NEXT: and $9, $8, $6
; MIPS4-NEXT: bne $9, $4, .LBB12_3
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS4-NEXT: and $8, $8, $7
; MIPS4-NEXT: or $8, $8, $5
; MIPS4-NEXT: sc $8, 0($2)
; MIPS4-NEXT: beqz $8, .LBB12_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: .LBB12_3: # %entry
; MIPS4-NEXT: srlv $1, $9, $3
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: sra $1, $1, 24
; MIPS4-NEXT: # %bb.4: # %entry
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: sra $2, $1, 24
;
; MIPS64-LABEL: AtomicCmpSwap8:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64-NEXT: ld $1, %got_disp(y)($1)
; MIPS64-NEXT: daddiu $2, $zero, -4
; MIPS64-NEXT: and $2, $1, $2
; MIPS64-NEXT: andi $1, $1, 3
; MIPS64-NEXT: sll $3, $1, 3
; MIPS64-NEXT: ori $1, $zero, 255
; MIPS64-NEXT: sllv $6, $1, $3
; MIPS64-NEXT: nor $7, $zero, $6
; MIPS64-NEXT: andi $1, $4, 255
; MIPS64-NEXT: sllv $4, $1, $3
; MIPS64-NEXT: andi $1, $5, 255
; MIPS64-NEXT: sllv $5, $1, $3
; MIPS64-NEXT: .LBB12_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $8, 0($2)
; MIPS64-NEXT: and $9, $8, $6
; MIPS64-NEXT: bne $9, $4, .LBB12_3
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS64-NEXT: and $8, $8, $7
; MIPS64-NEXT: or $8, $8, $5
; MIPS64-NEXT: sc $8, 0($2)
; MIPS64-NEXT: beqz $8, .LBB12_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: .LBB12_3: # %entry
; MIPS64-NEXT: srlv $1, $9, $3
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: sra $1, $1, 24
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: sra $2, $1, 24
;
; MIPS64R2-LABEL: AtomicCmpSwap8:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64R2-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R2-NEXT: daddiu $2, $zero, -4
; MIPS64R2-NEXT: and $3, $1, $2
; MIPS64R2-NEXT: andi $1, $1, 3
; MIPS64R2-NEXT: sll $1, $1, 3
; MIPS64R2-NEXT: ori $2, $zero, 255
; MIPS64R2-NEXT: sllv $6, $2, $1
; MIPS64R2-NEXT: nor $7, $zero, $6
; MIPS64R2-NEXT: andi $2, $4, 255
; MIPS64R2-NEXT: sllv $4, $2, $1
; MIPS64R2-NEXT: andi $2, $5, 255
; MIPS64R2-NEXT: sllv $5, $2, $1
; MIPS64R2-NEXT: .LBB12_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $8, 0($3)
; MIPS64R2-NEXT: and $9, $8, $6
; MIPS64R2-NEXT: bne $9, $4, .LBB12_3
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS64R2-NEXT: and $8, $8, $7
; MIPS64R2-NEXT: or $8, $8, $5
; MIPS64R2-NEXT: sc $8, 0($3)
; MIPS64R2-NEXT: beqz $8, .LBB12_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: .LBB12_3: # %entry
; MIPS64R2-NEXT: srlv $2, $9, $1
; MIPS64R2-NEXT: seb $2, $2
; MIPS64R2-NEXT: # %bb.4: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicCmpSwap8:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64R6-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6-NEXT: daddiu $2, $zero, -4
; MIPS64R6-NEXT: and $3, $1, $2
; MIPS64R6-NEXT: andi $1, $1, 3
; MIPS64R6-NEXT: sll $1, $1, 3
; MIPS64R6-NEXT: ori $2, $zero, 255
; MIPS64R6-NEXT: sllv $6, $2, $1
; MIPS64R6-NEXT: nor $7, $zero, $6
; MIPS64R6-NEXT: andi $2, $4, 255
; MIPS64R6-NEXT: sllv $4, $2, $1
; MIPS64R6-NEXT: andi $2, $5, 255
; MIPS64R6-NEXT: sllv $5, $2, $1
; MIPS64R6-NEXT: .LBB12_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $8, 0($3)
; MIPS64R6-NEXT: and $9, $8, $6
; MIPS64R6-NEXT: bnec $9, $4, .LBB12_3
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS64R6-NEXT: and $8, $8, $7
; MIPS64R6-NEXT: or $8, $8, $5
; MIPS64R6-NEXT: sc $8, 0($3)
; MIPS64R6-NEXT: beqzc $8, .LBB12_1
; MIPS64R6-NEXT: .LBB12_3: # %entry
; MIPS64R6-NEXT: srlv $2, $9, $1
; MIPS64R6-NEXT: seb $2, $2
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicCmpSwap8:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8)))
; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
; MIPS64R6O0-NEXT: andi $3, $1, 3
; MIPS64R6O0-NEXT: xori $3, $3, 3
; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $6, $zero, 255
; MIPS64R6O0-NEXT: sllv $6, $6, $3
; MIPS64R6O0-NEXT: nor $7, $zero, $6
; MIPS64R6O0-NEXT: andi $4, $4, 255
; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: andi $5, $5, 255
; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: .LBB12_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $9, 0($2)
; MIPS64R6O0-NEXT: and $10, $9, $6
; MIPS64R6O0-NEXT: bnec $10, $4, .LBB12_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS64R6O0-NEXT: and $9, $9, $7
; MIPS64R6O0-NEXT: or $9, $9, $5
; MIPS64R6O0-NEXT: sc $9, 0($2)
; MIPS64R6O0-NEXT: beqzc $9, .LBB12_1
; MIPS64R6O0-NEXT: .LBB12_3: # %entry
; MIPS64R6O0-NEXT: srlv $8, $10, $3
; MIPS64R6O0-NEXT: seb $8, $8
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicCmpSwap8:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(y)($2)
; MM32-NEXT: addiu $2, $zero, -4
; MM32-NEXT: and $3, $1, $2
; MM32-NEXT: andi $1, $1, 3
; MM32-NEXT: sll $1, $1, 3
; MM32-NEXT: ori $2, $zero, 255
; MM32-NEXT: sllv $6, $2, $1
; MM32-NEXT: nor $7, $zero, $6
; MM32-NEXT: andi $2, $4, 255
; MM32-NEXT: sllv $4, $2, $1
; MM32-NEXT: andi $2, $5, 255
; MM32-NEXT: sllv $5, $2, $1
; MM32-NEXT: $BB12_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $8, 0($3)
; MM32-NEXT: and $9, $8, $6
; MM32-NEXT: bne $9, $4, $BB12_3
; MM32-NEXT: nop
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: # in Loop: Header=BB12_1 Depth=1
; MM32-NEXT: and $8, $8, $7
; MM32-NEXT: or $8, $8, $5
; MM32-NEXT: sc $8, 0($3)
; MM32-NEXT: beqzc $8, $BB12_1
; MM32-NEXT: $BB12_3: # %entry
; MM32-NEXT: srlv $2, $9, $1
; MM32-NEXT: seb $2, $2
; MM32-NEXT: # %bb.4: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicCmpSwap8:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(y)($1)
; O1-NEXT: addiu $2, $zero, -4
; O1-NEXT: and $2, $1, $2
; O1-NEXT: andi $1, $1, 3
; O1-NEXT: sll $3, $1, 3
; O1-NEXT: ori $1, $zero, 255
; O1-NEXT: sllv $6, $1, $3
; O1-NEXT: nor $7, $zero, $6
; O1-NEXT: andi $1, $4, 255
; O1-NEXT: sllv $4, $1, $3
; O1-NEXT: andi $1, $5, 255
; O1-NEXT: sllv $5, $1, $3
; O1-NEXT: $BB12_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $8, 0($2)
; O1-NEXT: and $9, $8, $6
; O1-NEXT: bne $9, $4, $BB12_3
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: # in Loop: Header=BB12_1 Depth=1
; O1-NEXT: and $8, $8, $7
; O1-NEXT: or $8, $8, $5
; O1-NEXT: sc $8, 0($2)
; O1-NEXT: beqz $8, $BB12_1
; O1-NEXT: nop
; O1-NEXT: $BB12_3: # %entry
; O1-NEXT: srlv $1, $9, $3
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: sra $1, $1, 24
; O1-NEXT: # %bb.4: # %entry
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: jr $ra
; O1-NEXT: sra $2, $1, 24
;
; O2-LABEL: AtomicCmpSwap8:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(y)($1)
; O2-NEXT: addiu $2, $zero, -4
; O2-NEXT: and $2, $1, $2
; O2-NEXT: andi $1, $1, 3
; O2-NEXT: sll $3, $1, 3
; O2-NEXT: ori $1, $zero, 255
; O2-NEXT: sllv $6, $1, $3
; O2-NEXT: nor $7, $zero, $6
; O2-NEXT: andi $1, $4, 255
; O2-NEXT: sllv $4, $1, $3
; O2-NEXT: andi $1, $5, 255
; O2-NEXT: sllv $5, $1, $3
; O2-NEXT: $BB12_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $8, 0($2)
; O2-NEXT: and $9, $8, $6
; O2-NEXT: bne $9, $4, $BB12_3
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: # in Loop: Header=BB12_1 Depth=1
; O2-NEXT: and $8, $8, $7
; O2-NEXT: or $8, $8, $5
; O2-NEXT: sc $8, 0($2)
; O2-NEXT: beqz $8, $BB12_1
; O2-NEXT: nop
; O2-NEXT: $BB12_3: # %entry
; O2-NEXT: srlv $1, $9, $3
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: sra $1, $1, 24
; O2-NEXT: # %bb.4: # %entry
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: jr $ra
; O2-NEXT: sra $2, $1, 24
;
; O3-LABEL: AtomicCmpSwap8:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: addiu $2, $zero, -4
; O3-NEXT: lw $1, %got(y)($1)
; O3-NEXT: and $2, $1, $2
; O3-NEXT: andi $1, $1, 3
; O3-NEXT: sll $3, $1, 3
; O3-NEXT: ori $1, $zero, 255
; O3-NEXT: sllv $6, $1, $3
; O3-NEXT: andi $1, $4, 255
; O3-NEXT: sllv $4, $1, $3
; O3-NEXT: andi $1, $5, 255
; O3-NEXT: nor $7, $zero, $6
; O3-NEXT: sllv $5, $1, $3
; O3-NEXT: $BB12_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $8, 0($2)
; O3-NEXT: and $9, $8, $6
; O3-NEXT: bne $9, $4, $BB12_3
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: # in Loop: Header=BB12_1 Depth=1
; O3-NEXT: and $8, $8, $7
; O3-NEXT: or $8, $8, $5
; O3-NEXT: sc $8, 0($2)
; O3-NEXT: beqz $8, $BB12_1
; O3-NEXT: nop
; O3-NEXT: $BB12_3: # %entry
; O3-NEXT: srlv $1, $9, $3
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: sra $1, $1, 24
; O3-NEXT: # %bb.4: # %entry
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: jr $ra
; O3-NEXT: sra $2, $1, 24
;
; MIPS32EB-LABEL: AtomicCmpSwap8:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(y)($1)
; MIPS32EB-NEXT: addiu $2, $zero, -4
; MIPS32EB-NEXT: and $2, $1, $2
; MIPS32EB-NEXT: andi $1, $1, 3
; MIPS32EB-NEXT: xori $1, $1, 3
; MIPS32EB-NEXT: sll $3, $1, 3
; MIPS32EB-NEXT: ori $1, $zero, 255
; MIPS32EB-NEXT: sllv $6, $1, $3
; MIPS32EB-NEXT: nor $7, $zero, $6
; MIPS32EB-NEXT: andi $1, $4, 255
; MIPS32EB-NEXT: sllv $4, $1, $3
; MIPS32EB-NEXT: andi $1, $5, 255
; MIPS32EB-NEXT: sllv $5, $1, $3
; MIPS32EB-NEXT: $BB12_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $8, 0($2)
; MIPS32EB-NEXT: and $9, $8, $6
; MIPS32EB-NEXT: bne $9, $4, $BB12_3
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: # in Loop: Header=BB12_1 Depth=1
; MIPS32EB-NEXT: and $8, $8, $7
; MIPS32EB-NEXT: or $8, $8, $5
; MIPS32EB-NEXT: sc $8, 0($2)
; MIPS32EB-NEXT: beqz $8, $BB12_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: $BB12_3: # %entry
; MIPS32EB-NEXT: srlv $1, $9, $3
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: sra $1, $1, 24
; MIPS32EB-NEXT: # %bb.4: # %entry
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: sra $2, $1, 24
entry:
%pair0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic
%0 = extractvalue { i8, i1 } %pair0, 0
ret i8 %0
}
define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind {
; MIPS32-LABEL: AtomicCmpSwapRes8:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: addiu $1, $zero, -4
; MIPS32-NEXT: and $2, $4, $1
; MIPS32-NEXT: andi $1, $4, 3
; MIPS32-NEXT: sll $3, $1, 3
; MIPS32-NEXT: ori $1, $zero, 255
; MIPS32-NEXT: sllv $4, $1, $3
; MIPS32-NEXT: nor $7, $zero, $4
; MIPS32-NEXT: andi $1, $5, 255
; MIPS32-NEXT: sllv $8, $1, $3
; MIPS32-NEXT: andi $1, $6, 255
; MIPS32-NEXT: sllv $6, $1, $3
; MIPS32-NEXT: $BB13_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $9, 0($2)
; MIPS32-NEXT: and $10, $9, $4
; MIPS32-NEXT: bne $10, $8, $BB13_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS32-NEXT: and $9, $9, $7
; MIPS32-NEXT: or $9, $9, $6
; MIPS32-NEXT: sc $9, 0($2)
; MIPS32-NEXT: beqz $9, $BB13_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB13_3: # %entry
; MIPS32-NEXT: srlv $1, $10, $3
; MIPS32-NEXT: sll $1, $1, 24
; MIPS32-NEXT: sra $1, $1, 24
; MIPS32-NEXT: # %bb.4: # %entry
; MIPS32-NEXT: sll $2, $5, 24
; MIPS32-NEXT: sra $2, $2, 24
; MIPS32-NEXT: xor $1, $1, $2
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: sltiu $2, $1, 1
;
; MIPS32O0-LABEL: AtomicCmpSwapRes8:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addiu $1, $zero, -4
; MIPS32O0-NEXT: and $1, $4, $1
; MIPS32O0-NEXT: andi $2, $4, 3
; MIPS32O0-NEXT: sll $2, $2, 3
; MIPS32O0-NEXT: ori $3, $zero, 255
; MIPS32O0-NEXT: sllv $3, $3, $2
; MIPS32O0-NEXT: nor $4, $zero, $3
; MIPS32O0-NEXT: andi $7, $5, 255
; MIPS32O0-NEXT: sllv $7, $7, $2
; MIPS32O0-NEXT: andi $6, $6, 255
; MIPS32O0-NEXT: sllv $6, $6, $2
; MIPS32O0-NEXT: $BB13_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $9, 0($1)
; MIPS32O0-NEXT: and $10, $9, $3
; MIPS32O0-NEXT: bne $10, $7, $BB13_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS32O0-NEXT: and $9, $9, $4
; MIPS32O0-NEXT: or $9, $9, $6
; MIPS32O0-NEXT: sc $9, 0($1)
; MIPS32O0-NEXT: beqz $9, $BB13_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB13_3: # %entry
; MIPS32O0-NEXT: srlv $8, $10, $2
; MIPS32O0-NEXT: sll $8, $8, 24
; MIPS32O0-NEXT: sra $8, $8, 24
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.5: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 24
; MIPS32O0-NEXT: sra $2, $2, 24
; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: xor $2, $3, $2
; MIPS32O0-NEXT: sltiu $2, $2, 1
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicCmpSwapRes8:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: addiu $1, $zero, -4
; MIPS32R2-NEXT: and $2, $4, $1
; MIPS32R2-NEXT: andi $1, $4, 3
; MIPS32R2-NEXT: sll $3, $1, 3
; MIPS32R2-NEXT: ori $1, $zero, 255
; MIPS32R2-NEXT: sllv $4, $1, $3
; MIPS32R2-NEXT: nor $7, $zero, $4
; MIPS32R2-NEXT: andi $1, $5, 255
; MIPS32R2-NEXT: sllv $8, $1, $3
; MIPS32R2-NEXT: andi $1, $6, 255
; MIPS32R2-NEXT: sllv $6, $1, $3
; MIPS32R2-NEXT: $BB13_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $9, 0($2)
; MIPS32R2-NEXT: and $10, $9, $4
; MIPS32R2-NEXT: bne $10, $8, $BB13_3
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS32R2-NEXT: and $9, $9, $7
; MIPS32R2-NEXT: or $9, $9, $6
; MIPS32R2-NEXT: sc $9, 0($2)
; MIPS32R2-NEXT: beqz $9, $BB13_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: $BB13_3: # %entry
; MIPS32R2-NEXT: srlv $1, $10, $3
; MIPS32R2-NEXT: seb $1, $1
; MIPS32R2-NEXT: # %bb.4: # %entry
; MIPS32R2-NEXT: xor $1, $1, $5
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: sltiu $2, $1, 1
;
; MIPS32R6-LABEL: AtomicCmpSwapRes8:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: addiu $1, $zero, -4
; MIPS32R6-NEXT: and $2, $4, $1
; MIPS32R6-NEXT: andi $1, $4, 3
; MIPS32R6-NEXT: sll $3, $1, 3
; MIPS32R6-NEXT: ori $1, $zero, 255
; MIPS32R6-NEXT: sllv $4, $1, $3
; MIPS32R6-NEXT: nor $7, $zero, $4
; MIPS32R6-NEXT: andi $1, $5, 255
; MIPS32R6-NEXT: sllv $8, $1, $3
; MIPS32R6-NEXT: andi $1, $6, 255
; MIPS32R6-NEXT: sllv $6, $1, $3
; MIPS32R6-NEXT: $BB13_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $9, 0($2)
; MIPS32R6-NEXT: and $10, $9, $4
; MIPS32R6-NEXT: bnec $10, $8, $BB13_3
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS32R6-NEXT: and $9, $9, $7
; MIPS32R6-NEXT: or $9, $9, $6
; MIPS32R6-NEXT: sc $9, 0($2)
; MIPS32R6-NEXT: beqzc $9, $BB13_1
; MIPS32R6-NEXT: $BB13_3: # %entry
; MIPS32R6-NEXT: srlv $1, $10, $3
; MIPS32R6-NEXT: seb $1, $1
; MIPS32R6-NEXT: # %bb.4: # %entry
; MIPS32R6-NEXT: xor $1, $1, $5
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: sltiu $2, $1, 1
;
; MIPS32R6O0-LABEL: AtomicCmpSwapRes8:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: move $1, $6
; MIPS32R6O0-NEXT: move $2, $5
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: addiu $3, $zero, -4
; MIPS32R6O0-NEXT: and $3, $4, $3
; MIPS32R6O0-NEXT: andi $4, $4, 3
; MIPS32R6O0-NEXT: sll $4, $4, 3
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ori $7, $zero, 255
; MIPS32R6O0-NEXT: sllv $7, $7, $4
; MIPS32R6O0-NEXT: nor $8, $zero, $7
; MIPS32R6O0-NEXT: andi $9, $5, 255
; MIPS32R6O0-NEXT: sllv $9, $9, $4
; MIPS32R6O0-NEXT: andi $6, $6, 255
; MIPS32R6O0-NEXT: sllv $6, $6, $4
; MIPS32R6O0-NEXT: $BB13_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $11, 0($3)
; MIPS32R6O0-NEXT: and $12, $11, $7
; MIPS32R6O0-NEXT: bnec $12, $9, $BB13_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: and $11, $11, $8
; MIPS32R6O0-NEXT: or $11, $11, $6
; MIPS32R6O0-NEXT: sc $11, 0($3)
; MIPS32R6O0-NEXT: beqzc $11, $BB13_1
; MIPS32R6O0-NEXT: $BB13_3: # %entry
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: srlv $10, $12, $4
; MIPS32R6O0-NEXT: seb $10, $10
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: sw $10, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: xor $1, $1, $2
; MIPS32R6O0-NEXT: sltiu $2, $1, 1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicCmpSwapRes8:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: daddiu $1, $zero, -4
; MIPS4-NEXT: and $2, $4, $1
; MIPS4-NEXT: andi $1, $4, 3
; MIPS4-NEXT: sll $3, $1, 3
; MIPS4-NEXT: ori $1, $zero, 255
; MIPS4-NEXT: sllv $4, $1, $3
; MIPS4-NEXT: nor $7, $zero, $4
; MIPS4-NEXT: andi $1, $5, 255
; MIPS4-NEXT: sllv $8, $1, $3
; MIPS4-NEXT: andi $1, $6, 255
; MIPS4-NEXT: sllv $6, $1, $3
; MIPS4-NEXT: .LBB13_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $9, 0($2)
; MIPS4-NEXT: and $10, $9, $4
; MIPS4-NEXT: bne $10, $8, .LBB13_3
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS4-NEXT: and $9, $9, $7
; MIPS4-NEXT: or $9, $9, $6
; MIPS4-NEXT: sc $9, 0($2)
; MIPS4-NEXT: beqz $9, .LBB13_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: .LBB13_3: # %entry
; MIPS4-NEXT: srlv $1, $10, $3
; MIPS4-NEXT: sll $1, $1, 24
; MIPS4-NEXT: sra $1, $1, 24
; MIPS4-NEXT: # %bb.4: # %entry
; MIPS4-NEXT: sll $2, $5, 24
; MIPS4-NEXT: sra $2, $2, 24
; MIPS4-NEXT: xor $1, $1, $2
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: sltiu $2, $1, 1
;
; MIPS64-LABEL: AtomicCmpSwapRes8:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: daddiu $1, $zero, -4
; MIPS64-NEXT: and $2, $4, $1
; MIPS64-NEXT: andi $1, $4, 3
; MIPS64-NEXT: sll $3, $1, 3
; MIPS64-NEXT: ori $1, $zero, 255
; MIPS64-NEXT: sllv $4, $1, $3
; MIPS64-NEXT: nor $7, $zero, $4
; MIPS64-NEXT: andi $1, $5, 255
; MIPS64-NEXT: sllv $8, $1, $3
; MIPS64-NEXT: andi $1, $6, 255
; MIPS64-NEXT: sllv $6, $1, $3
; MIPS64-NEXT: .LBB13_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $9, 0($2)
; MIPS64-NEXT: and $10, $9, $4
; MIPS64-NEXT: bne $10, $8, .LBB13_3
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS64-NEXT: and $9, $9, $7
; MIPS64-NEXT: or $9, $9, $6
; MIPS64-NEXT: sc $9, 0($2)
; MIPS64-NEXT: beqz $9, .LBB13_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: .LBB13_3: # %entry
; MIPS64-NEXT: srlv $1, $10, $3
; MIPS64-NEXT: sll $1, $1, 24
; MIPS64-NEXT: sra $1, $1, 24
; MIPS64-NEXT: # %bb.4: # %entry
; MIPS64-NEXT: sll $2, $5, 24
; MIPS64-NEXT: sra $2, $2, 24
; MIPS64-NEXT: xor $1, $1, $2
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: sltiu $2, $1, 1
;
; MIPS64R2-LABEL: AtomicCmpSwapRes8:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: daddiu $1, $zero, -4
; MIPS64R2-NEXT: and $2, $4, $1
; MIPS64R2-NEXT: andi $1, $4, 3
; MIPS64R2-NEXT: sll $3, $1, 3
; MIPS64R2-NEXT: ori $1, $zero, 255
; MIPS64R2-NEXT: sllv $4, $1, $3
; MIPS64R2-NEXT: nor $7, $zero, $4
; MIPS64R2-NEXT: andi $1, $5, 255
; MIPS64R2-NEXT: sllv $8, $1, $3
; MIPS64R2-NEXT: andi $1, $6, 255
; MIPS64R2-NEXT: sllv $6, $1, $3
; MIPS64R2-NEXT: .LBB13_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $9, 0($2)
; MIPS64R2-NEXT: and $10, $9, $4
; MIPS64R2-NEXT: bne $10, $8, .LBB13_3
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS64R2-NEXT: and $9, $9, $7
; MIPS64R2-NEXT: or $9, $9, $6
; MIPS64R2-NEXT: sc $9, 0($2)
; MIPS64R2-NEXT: beqz $9, .LBB13_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: .LBB13_3: # %entry
; MIPS64R2-NEXT: srlv $1, $10, $3
; MIPS64R2-NEXT: seb $1, $1
; MIPS64R2-NEXT: # %bb.4: # %entry
; MIPS64R2-NEXT: xor $1, $1, $5
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: sltiu $2, $1, 1
;
; MIPS64R6-LABEL: AtomicCmpSwapRes8:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: daddiu $1, $zero, -4
; MIPS64R6-NEXT: and $2, $4, $1
; MIPS64R6-NEXT: andi $1, $4, 3
; MIPS64R6-NEXT: sll $3, $1, 3
; MIPS64R6-NEXT: ori $1, $zero, 255
; MIPS64R6-NEXT: sllv $4, $1, $3
; MIPS64R6-NEXT: nor $7, $zero, $4
; MIPS64R6-NEXT: andi $1, $5, 255
; MIPS64R6-NEXT: sllv $8, $1, $3
; MIPS64R6-NEXT: andi $1, $6, 255
; MIPS64R6-NEXT: sllv $6, $1, $3
; MIPS64R6-NEXT: .LBB13_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $9, 0($2)
; MIPS64R6-NEXT: and $10, $9, $4
; MIPS64R6-NEXT: bnec $10, $8, .LBB13_3
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS64R6-NEXT: and $9, $9, $7
; MIPS64R6-NEXT: or $9, $9, $6
; MIPS64R6-NEXT: sc $9, 0($2)
; MIPS64R6-NEXT: beqzc $9, .LBB13_1
; MIPS64R6-NEXT: .LBB13_3: # %entry
; MIPS64R6-NEXT: srlv $1, $10, $3
; MIPS64R6-NEXT: seb $1, $1
; MIPS64R6-NEXT: # %bb.4: # %entry
; MIPS64R6-NEXT: xor $1, $1, $5
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: sltiu $2, $1, 1
;
; MIPS64R6O0-LABEL: AtomicCmpSwapRes8:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64
; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: daddiu $1, $zero, -4
; MIPS64R6O0-NEXT: and $1, $4, $1
; MIPS64R6O0-NEXT: andi $2, $4, 3
; MIPS64R6O0-NEXT: xori $2, $2, 3
; MIPS64R6O0-NEXT: sll $2, $2, 3
; MIPS64R6O0-NEXT: ori $3, $zero, 255
; MIPS64R6O0-NEXT: sllv $3, $3, $2
; MIPS64R6O0-NEXT: nor $7, $zero, $3
; MIPS64R6O0-NEXT: andi $8, $5, 255
; MIPS64R6O0-NEXT: sllv $8, $8, $2
; MIPS64R6O0-NEXT: andi $6, $6, 255
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: sllv $6, $6, $2
; MIPS64R6O0-NEXT: .LBB13_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: ll $10, 0($1)
; MIPS64R6O0-NEXT: and $11, $10, $3
; MIPS64R6O0-NEXT: bnec $11, $8, .LBB13_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: and $10, $10, $7
; MIPS64R6O0-NEXT: or $10, $10, $6
; MIPS64R6O0-NEXT: sc $10, 0($1)
; MIPS64R6O0-NEXT: beqzc $10, .LBB13_1
; MIPS64R6O0-NEXT: .LBB13_3: # %entry
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: srlv $9, $11, $2
; MIPS64R6O0-NEXT: seb $9, $9
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: xor $1, $1, $2
; MIPS64R6O0-NEXT: sltiu $2, $1, 1
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicCmpSwapRes8:
; MM32: # %bb.0: # %entry
; MM32-NEXT: addiu $1, $zero, -4
; MM32-NEXT: and $2, $4, $1
; MM32-NEXT: andi $1, $4, 3
; MM32-NEXT: sll $3, $1, 3
; MM32-NEXT: ori $1, $zero, 255
; MM32-NEXT: sllv $4, $1, $3
; MM32-NEXT: nor $7, $zero, $4
; MM32-NEXT: andi $1, $5, 255
; MM32-NEXT: sllv $8, $1, $3
; MM32-NEXT: andi $1, $6, 255
; MM32-NEXT: sllv $6, $1, $3
; MM32-NEXT: $BB13_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $9, 0($2)
; MM32-NEXT: and $10, $9, $4
; MM32-NEXT: bne $10, $8, $BB13_3
; MM32-NEXT: nop
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: # in Loop: Header=BB13_1 Depth=1
; MM32-NEXT: and $9, $9, $7
; MM32-NEXT: or $9, $9, $6
; MM32-NEXT: sc $9, 0($2)
; MM32-NEXT: beqzc $9, $BB13_1
; MM32-NEXT: $BB13_3: # %entry
; MM32-NEXT: srlv $1, $10, $3
; MM32-NEXT: seb $1, $1
; MM32-NEXT: # %bb.4: # %entry
; MM32-NEXT: xor $1, $1, $5
; MM32-NEXT: jr $ra
; MM32-NEXT: sltiu $2, $1, 1
;
; O1-LABEL: AtomicCmpSwapRes8:
; O1: # %bb.0: # %entry
; O1-NEXT: addiu $1, $zero, -4
; O1-NEXT: and $2, $4, $1
; O1-NEXT: andi $1, $4, 3
; O1-NEXT: sll $3, $1, 3
; O1-NEXT: ori $1, $zero, 255
; O1-NEXT: sllv $4, $1, $3
; O1-NEXT: nor $7, $zero, $4
; O1-NEXT: andi $1, $5, 255
; O1-NEXT: sllv $8, $1, $3
; O1-NEXT: andi $1, $6, 255
; O1-NEXT: sllv $6, $1, $3
; O1-NEXT: $BB13_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $9, 0($2)
; O1-NEXT: and $10, $9, $4
; O1-NEXT: bne $10, $8, $BB13_3
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: # in Loop: Header=BB13_1 Depth=1
; O1-NEXT: and $9, $9, $7
; O1-NEXT: or $9, $9, $6
; O1-NEXT: sc $9, 0($2)
; O1-NEXT: beqz $9, $BB13_1
; O1-NEXT: nop
; O1-NEXT: $BB13_3: # %entry
; O1-NEXT: srlv $1, $10, $3
; O1-NEXT: sll $1, $1, 24
; O1-NEXT: sra $1, $1, 24
; O1-NEXT: # %bb.4: # %entry
; O1-NEXT: sll $2, $5, 24
; O1-NEXT: sra $2, $2, 24
; O1-NEXT: xor $1, $1, $2
; O1-NEXT: jr $ra
; O1-NEXT: sltiu $2, $1, 1
;
; O2-LABEL: AtomicCmpSwapRes8:
; O2: # %bb.0: # %entry
; O2-NEXT: addiu $1, $zero, -4
; O2-NEXT: and $2, $4, $1
; O2-NEXT: andi $1, $4, 3
; O2-NEXT: sll $3, $1, 3
; O2-NEXT: ori $1, $zero, 255
; O2-NEXT: sllv $4, $1, $3
; O2-NEXT: nor $7, $zero, $4
; O2-NEXT: andi $1, $5, 255
; O2-NEXT: sllv $8, $1, $3
; O2-NEXT: andi $1, $6, 255
; O2-NEXT: sllv $6, $1, $3
; O2-NEXT: $BB13_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $9, 0($2)
; O2-NEXT: and $10, $9, $4
; O2-NEXT: bne $10, $8, $BB13_3
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: # in Loop: Header=BB13_1 Depth=1
; O2-NEXT: and $9, $9, $7
; O2-NEXT: or $9, $9, $6
; O2-NEXT: sc $9, 0($2)
; O2-NEXT: beqz $9, $BB13_1
; O2-NEXT: nop
; O2-NEXT: $BB13_3: # %entry
; O2-NEXT: srlv $1, $10, $3
; O2-NEXT: sll $1, $1, 24
; O2-NEXT: sra $1, $1, 24
; O2-NEXT: # %bb.4: # %entry
; O2-NEXT: sll $2, $5, 24
; O2-NEXT: sra $2, $2, 24
; O2-NEXT: xor $1, $1, $2
; O2-NEXT: jr $ra
; O2-NEXT: sltiu $2, $1, 1
;
; O3-LABEL: AtomicCmpSwapRes8:
; O3: # %bb.0: # %entry
; O3-NEXT: addiu $1, $zero, -4
; O3-NEXT: and $2, $4, $1
; O3-NEXT: andi $1, $4, 3
; O3-NEXT: sll $3, $1, 3
; O3-NEXT: ori $1, $zero, 255
; O3-NEXT: sllv $4, $1, $3
; O3-NEXT: andi $1, $5, 255
; O3-NEXT: sllv $8, $1, $3
; O3-NEXT: andi $1, $6, 255
; O3-NEXT: nor $7, $zero, $4
; O3-NEXT: sllv $6, $1, $3
; O3-NEXT: $BB13_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $9, 0($2)
; O3-NEXT: and $10, $9, $4
; O3-NEXT: bne $10, $8, $BB13_3
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: # in Loop: Header=BB13_1 Depth=1
; O3-NEXT: and $9, $9, $7
; O3-NEXT: or $9, $9, $6
; O3-NEXT: sc $9, 0($2)
; O3-NEXT: beqz $9, $BB13_1
; O3-NEXT: nop
; O3-NEXT: $BB13_3: # %entry
; O3-NEXT: srlv $1, $10, $3
; O3-NEXT: sll $1, $1, 24
; O3-NEXT: sra $1, $1, 24
; O3-NEXT: # %bb.4: # %entry
; O3-NEXT: sll $2, $5, 24
; O3-NEXT: sra $2, $2, 24
; O3-NEXT: xor $1, $1, $2
; O3-NEXT: jr $ra
; O3-NEXT: sltiu $2, $1, 1
;
; MIPS32EB-LABEL: AtomicCmpSwapRes8:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: addiu $1, $zero, -4
; MIPS32EB-NEXT: and $2, $4, $1
; MIPS32EB-NEXT: andi $1, $4, 3
; MIPS32EB-NEXT: xori $1, $1, 3
; MIPS32EB-NEXT: sll $3, $1, 3
; MIPS32EB-NEXT: ori $1, $zero, 255
; MIPS32EB-NEXT: sllv $4, $1, $3
; MIPS32EB-NEXT: nor $7, $zero, $4
; MIPS32EB-NEXT: andi $1, $5, 255
; MIPS32EB-NEXT: sllv $8, $1, $3
; MIPS32EB-NEXT: andi $1, $6, 255
; MIPS32EB-NEXT: sllv $6, $1, $3
; MIPS32EB-NEXT: $BB13_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $9, 0($2)
; MIPS32EB-NEXT: and $10, $9, $4
; MIPS32EB-NEXT: bne $10, $8, $BB13_3
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: # in Loop: Header=BB13_1 Depth=1
; MIPS32EB-NEXT: and $9, $9, $7
; MIPS32EB-NEXT: or $9, $9, $6
; MIPS32EB-NEXT: sc $9, 0($2)
; MIPS32EB-NEXT: beqz $9, $BB13_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: $BB13_3: # %entry
; MIPS32EB-NEXT: srlv $1, $10, $3
; MIPS32EB-NEXT: sll $1, $1, 24
; MIPS32EB-NEXT: sra $1, $1, 24
; MIPS32EB-NEXT: # %bb.4: # %entry
; MIPS32EB-NEXT: sll $2, $5, 24
; MIPS32EB-NEXT: sra $2, $2, 24
; MIPS32EB-NEXT: xor $1, $1, $2
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: sltiu $2, $1, 1
entry:
%0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic
%1 = extractvalue { i8, i1 } %0, 1
ret i1 %1
; FIXME: -march=mips produces a redundant sign extension here...
; FIXME: ...Leading to this split check.
}
; Check one i16 so that we cover the seh sign extend
@z = common global i16 0, align 1
define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadAdd16:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(z)($1)
; MIPS32-NEXT: addiu $2, $zero, -4
; MIPS32-NEXT: and $2, $1, $2
; MIPS32-NEXT: andi $1, $1, 3
; MIPS32-NEXT: sll $3, $1, 3
; MIPS32-NEXT: ori $1, $zero, 65535
; MIPS32-NEXT: sllv $5, $1, $3
; MIPS32-NEXT: nor $6, $zero, $5
; MIPS32-NEXT: sllv $4, $4, $3
; MIPS32-NEXT: $BB14_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $7, 0($2)
; MIPS32-NEXT: addu $8, $7, $4
; MIPS32-NEXT: and $8, $8, $5
; MIPS32-NEXT: and $9, $7, $6
; MIPS32-NEXT: or $9, $9, $8
; MIPS32-NEXT: sc $9, 0($2)
; MIPS32-NEXT: beqz $9, $BB14_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: and $1, $7, $5
; MIPS32-NEXT: srlv $1, $1, $3
; MIPS32-NEXT: sll $1, $1, 16
; MIPS32-NEXT: sra $1, $1, 16
; MIPS32-NEXT: # %bb.3: # %entry
; MIPS32-NEXT: sll $1, $1, 16
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: sra $2, $1, 16
;
; MIPS32O0-LABEL: AtomicLoadAdd16:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(z)($1)
; MIPS32O0-NEXT: addiu $2, $zero, -4
; MIPS32O0-NEXT: and $2, $1, $2
; MIPS32O0-NEXT: andi $1, $1, 3
; MIPS32O0-NEXT: sll $1, $1, 3
; MIPS32O0-NEXT: ori $3, $zero, 65535
; MIPS32O0-NEXT: sllv $3, $3, $1
; MIPS32O0-NEXT: nor $5, $zero, $3
; MIPS32O0-NEXT: sllv $4, $4, $1
; MIPS32O0-NEXT: $BB14_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $7, 0($2)
; MIPS32O0-NEXT: addu $8, $7, $4
; MIPS32O0-NEXT: and $8, $8, $3
; MIPS32O0-NEXT: and $9, $7, $5
; MIPS32O0-NEXT: or $9, $9, $8
; MIPS32O0-NEXT: sc $9, 0($2)
; MIPS32O0-NEXT: beqz $9, $BB14_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: and $6, $7, $3
; MIPS32O0-NEXT: srlv $6, $6, $1
; MIPS32O0-NEXT: sll $6, $6, 16
; MIPS32O0-NEXT: sra $6, $6, 16
; MIPS32O0-NEXT: # %bb.3: # %entry
; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.4: # %entry
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 16
; MIPS32O0-NEXT: sra $2, $2, 16
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadAdd16:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(z)($1)
; MIPS32R2-NEXT: addiu $2, $zero, -4
; MIPS32R2-NEXT: and $2, $1, $2
; MIPS32R2-NEXT: andi $1, $1, 3
; MIPS32R2-NEXT: sll $3, $1, 3
; MIPS32R2-NEXT: ori $1, $zero, 65535
; MIPS32R2-NEXT: sllv $5, $1, $3
; MIPS32R2-NEXT: nor $6, $zero, $5
; MIPS32R2-NEXT: sllv $4, $4, $3
; MIPS32R2-NEXT: $BB14_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $7, 0($2)
; MIPS32R2-NEXT: addu $8, $7, $4
; MIPS32R2-NEXT: and $8, $8, $5
; MIPS32R2-NEXT: and $9, $7, $6
; MIPS32R2-NEXT: or $9, $9, $8
; MIPS32R2-NEXT: sc $9, 0($2)
; MIPS32R2-NEXT: beqz $9, $BB14_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: and $1, $7, $5
; MIPS32R2-NEXT: srlv $1, $1, $3
; MIPS32R2-NEXT: seh $1, $1
; MIPS32R2-NEXT: # %bb.3: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: seh $2, $1
;
; MIPS32R6-LABEL: AtomicLoadAdd16:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(z)($1)
; MIPS32R6-NEXT: addiu $2, $zero, -4
; MIPS32R6-NEXT: and $2, $1, $2
; MIPS32R6-NEXT: andi $1, $1, 3
; MIPS32R6-NEXT: sll $3, $1, 3
; MIPS32R6-NEXT: ori $1, $zero, 65535
; MIPS32R6-NEXT: sllv $5, $1, $3
; MIPS32R6-NEXT: nor $6, $zero, $5
; MIPS32R6-NEXT: sllv $4, $4, $3
; MIPS32R6-NEXT: $BB14_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $7, 0($2)
; MIPS32R6-NEXT: addu $8, $7, $4
; MIPS32R6-NEXT: and $8, $8, $5
; MIPS32R6-NEXT: and $9, $7, $6
; MIPS32R6-NEXT: or $9, $9, $8
; MIPS32R6-NEXT: sc $9, 0($2)
; MIPS32R6-NEXT: beqzc $9, $BB14_1
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: and $1, $7, $5
; MIPS32R6-NEXT: srlv $1, $1, $3
; MIPS32R6-NEXT: seh $1, $1
; MIPS32R6-NEXT: # %bb.3: # %entry
; MIPS32R6-NEXT: jr $ra
; MIPS32R6-NEXT: seh $2, $1
;
; MIPS32R6O0-LABEL: AtomicLoadAdd16:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $4
; MIPS32R6O0-NEXT: lw $1, %got(z)($1)
; MIPS32R6O0-NEXT: addiu $3, $zero, -4
; MIPS32R6O0-NEXT: and $3, $1, $3
; MIPS32R6O0-NEXT: andi $1, $1, 3
; MIPS32R6O0-NEXT: sll $1, $1, 3
; MIPS32R6O0-NEXT: ori $5, $zero, 65535
; MIPS32R6O0-NEXT: sllv $5, $5, $1
; MIPS32R6O0-NEXT: nor $6, $zero, $5
; MIPS32R6O0-NEXT: sllv $4, $4, $1
; MIPS32R6O0-NEXT: $BB14_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: ll $8, 0($3)
; MIPS32R6O0-NEXT: addu $9, $8, $4
; MIPS32R6O0-NEXT: and $9, $9, $5
; MIPS32R6O0-NEXT: and $10, $8, $6
; MIPS32R6O0-NEXT: or $10, $10, $9
; MIPS32R6O0-NEXT: sc $10, 0($3)
; MIPS32R6O0-NEXT: beqzc $10, $BB14_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: and $7, $8, $5
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seh $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seh $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadAdd16:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS4-NEXT: ld $1, %got_disp(z)($1)
; MIPS4-NEXT: daddiu $2, $zero, -4
; MIPS4-NEXT: and $2, $1, $2
; MIPS4-NEXT: andi $1, $1, 3
; MIPS4-NEXT: sll $3, $1, 3
; MIPS4-NEXT: ori $1, $zero, 65535
; MIPS4-NEXT: sllv $5, $1, $3
; MIPS4-NEXT: nor $6, $zero, $5
; MIPS4-NEXT: sllv $4, $4, $3
; MIPS4-NEXT: .LBB14_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $7, 0($2)
; MIPS4-NEXT: addu $8, $7, $4
; MIPS4-NEXT: and $8, $8, $5
; MIPS4-NEXT: and $9, $7, $6
; MIPS4-NEXT: or $9, $9, $8
; MIPS4-NEXT: sc $9, 0($2)
; MIPS4-NEXT: beqz $9, .LBB14_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: and $1, $7, $5
; MIPS4-NEXT: srlv $1, $1, $3
; MIPS4-NEXT: sll $1, $1, 16
; MIPS4-NEXT: sra $1, $1, 16
; MIPS4-NEXT: # %bb.3: # %entry
; MIPS4-NEXT: sll $1, $1, 16
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: sra $2, $1, 16
;
; MIPS64-LABEL: AtomicLoadAdd16:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64-NEXT: ld $1, %got_disp(z)($1)
; MIPS64-NEXT: daddiu $2, $zero, -4
; MIPS64-NEXT: and $2, $1, $2
; MIPS64-NEXT: andi $1, $1, 3
; MIPS64-NEXT: sll $3, $1, 3
; MIPS64-NEXT: ori $1, $zero, 65535
; MIPS64-NEXT: sllv $5, $1, $3
; MIPS64-NEXT: nor $6, $zero, $5
; MIPS64-NEXT: sllv $4, $4, $3
; MIPS64-NEXT: .LBB14_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $7, 0($2)
; MIPS64-NEXT: addu $8, $7, $4
; MIPS64-NEXT: and $8, $8, $5
; MIPS64-NEXT: and $9, $7, $6
; MIPS64-NEXT: or $9, $9, $8
; MIPS64-NEXT: sc $9, 0($2)
; MIPS64-NEXT: beqz $9, .LBB14_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: and $1, $7, $5
; MIPS64-NEXT: srlv $1, $1, $3
; MIPS64-NEXT: sll $1, $1, 16
; MIPS64-NEXT: sra $1, $1, 16
; MIPS64-NEXT: # %bb.3: # %entry
; MIPS64-NEXT: sll $1, $1, 16
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: sra $2, $1, 16
;
; MIPS64R2-LABEL: AtomicLoadAdd16:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64R2-NEXT: ld $1, %got_disp(z)($1)
; MIPS64R2-NEXT: daddiu $2, $zero, -4
; MIPS64R2-NEXT: and $2, $1, $2
; MIPS64R2-NEXT: andi $1, $1, 3
; MIPS64R2-NEXT: sll $3, $1, 3
; MIPS64R2-NEXT: ori $1, $zero, 65535
; MIPS64R2-NEXT: sllv $5, $1, $3
; MIPS64R2-NEXT: nor $6, $zero, $5
; MIPS64R2-NEXT: sllv $4, $4, $3
; MIPS64R2-NEXT: .LBB14_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $7, 0($2)
; MIPS64R2-NEXT: addu $8, $7, $4
; MIPS64R2-NEXT: and $8, $8, $5
; MIPS64R2-NEXT: and $9, $7, $6
; MIPS64R2-NEXT: or $9, $9, $8
; MIPS64R2-NEXT: sc $9, 0($2)
; MIPS64R2-NEXT: beqz $9, .LBB14_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: and $1, $7, $5
; MIPS64R2-NEXT: srlv $1, $1, $3
; MIPS64R2-NEXT: seh $1, $1
; MIPS64R2-NEXT: # %bb.3: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: seh $2, $1
;
; MIPS64R6-LABEL: AtomicLoadAdd16:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64R6-NEXT: ld $1, %got_disp(z)($1)
; MIPS64R6-NEXT: daddiu $2, $zero, -4
; MIPS64R6-NEXT: and $2, $1, $2
; MIPS64R6-NEXT: andi $1, $1, 3
; MIPS64R6-NEXT: sll $3, $1, 3
; MIPS64R6-NEXT: ori $1, $zero, 65535
; MIPS64R6-NEXT: sllv $5, $1, $3
; MIPS64R6-NEXT: nor $6, $zero, $5
; MIPS64R6-NEXT: sllv $4, $4, $3
; MIPS64R6-NEXT: .LBB14_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $7, 0($2)
; MIPS64R6-NEXT: addu $8, $7, $4
; MIPS64R6-NEXT: and $8, $8, $5
; MIPS64R6-NEXT: and $9, $7, $6
; MIPS64R6-NEXT: or $9, $9, $8
; MIPS64R6-NEXT: sc $9, 0($2)
; MIPS64R6-NEXT: beqzc $9, .LBB14_1
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: and $1, $7, $5
; MIPS64R6-NEXT: srlv $1, $1, $3
; MIPS64R6-NEXT: seh $1, $1
; MIPS64R6-NEXT: # %bb.3: # %entry
; MIPS64R6-NEXT: jr $ra
; MIPS64R6-NEXT: seh $2, $1
;
; MIPS64R6O0-LABEL: AtomicLoadAdd16:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1)
; MIPS64R6O0-NEXT: daddiu $2, $zero, -4
; MIPS64R6O0-NEXT: and $2, $1, $2
; MIPS64R6O0-NEXT: andi $3, $1, 3
; MIPS64R6O0-NEXT: xori $3, $3, 2
; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 65535
; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: sllv $4, $4, $3
; MIPS64R6O0-NEXT: .LBB14_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $8, 0($2)
; MIPS64R6O0-NEXT: addu $9, $8, $4
; MIPS64R6O0-NEXT: and $9, $9, $5
; MIPS64R6O0-NEXT: and $10, $8, $6
; MIPS64R6O0-NEXT: or $10, $10, $9
; MIPS64R6O0-NEXT: sc $10, 0($2)
; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: and $7, $8, $5
; MIPS64R6O0-NEXT: srlv $7, $7, $3
; MIPS64R6O0-NEXT: seh $7, $7
; MIPS64R6O0-NEXT: # %bb.3: # %entry
; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.4: # %entry
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadAdd16:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(z)($2)
; MM32-NEXT: addiu $2, $zero, -4
; MM32-NEXT: and $2, $1, $2
; MM32-NEXT: andi $1, $1, 3
; MM32-NEXT: sll $3, $1, 3
; MM32-NEXT: ori $1, $zero, 65535
; MM32-NEXT: sllv $5, $1, $3
; MM32-NEXT: nor $6, $zero, $5
; MM32-NEXT: sllv $4, $4, $3
; MM32-NEXT: $BB14_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $7, 0($2)
; MM32-NEXT: addu $8, $7, $4
; MM32-NEXT: and $8, $8, $5
; MM32-NEXT: and $9, $7, $6
; MM32-NEXT: or $9, $9, $8
; MM32-NEXT: sc $9, 0($2)
; MM32-NEXT: beqzc $9, $BB14_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: and $1, $7, $5
; MM32-NEXT: srlv $1, $1, $3
; MM32-NEXT: seh $1, $1
; MM32-NEXT: # %bb.3: # %entry
; MM32-NEXT: jr $ra
; MM32-NEXT: seh $2, $1
;
; O1-LABEL: AtomicLoadAdd16:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(z)($1)
; O1-NEXT: addiu $2, $zero, -4
; O1-NEXT: and $2, $1, $2
; O1-NEXT: andi $1, $1, 3
; O1-NEXT: sll $3, $1, 3
; O1-NEXT: ori $1, $zero, 65535
; O1-NEXT: sllv $5, $1, $3
; O1-NEXT: nor $6, $zero, $5
; O1-NEXT: sllv $4, $4, $3
; O1-NEXT: $BB14_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $7, 0($2)
; O1-NEXT: addu $8, $7, $4
; O1-NEXT: and $8, $8, $5
; O1-NEXT: and $9, $7, $6
; O1-NEXT: or $9, $9, $8
; O1-NEXT: sc $9, 0($2)
; O1-NEXT: beqz $9, $BB14_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: and $1, $7, $5
; O1-NEXT: srlv $1, $1, $3
; O1-NEXT: sll $1, $1, 16
; O1-NEXT: sra $1, $1, 16
; O1-NEXT: # %bb.3: # %entry
; O1-NEXT: sll $1, $1, 16
; O1-NEXT: jr $ra
; O1-NEXT: sra $2, $1, 16
;
; O2-LABEL: AtomicLoadAdd16:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(z)($1)
; O2-NEXT: addiu $2, $zero, -4
; O2-NEXT: and $2, $1, $2
; O2-NEXT: andi $1, $1, 3
; O2-NEXT: sll $3, $1, 3
; O2-NEXT: ori $1, $zero, 65535
; O2-NEXT: sllv $5, $1, $3
; O2-NEXT: nor $6, $zero, $5
; O2-NEXT: sllv $4, $4, $3
; O2-NEXT: $BB14_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $7, 0($2)
; O2-NEXT: addu $8, $7, $4
; O2-NEXT: and $8, $8, $5
; O2-NEXT: and $9, $7, $6
; O2-NEXT: or $9, $9, $8
; O2-NEXT: sc $9, 0($2)
; O2-NEXT: beqz $9, $BB14_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: and $1, $7, $5
; O2-NEXT: srlv $1, $1, $3
; O2-NEXT: sll $1, $1, 16
; O2-NEXT: sra $1, $1, 16
; O2-NEXT: # %bb.3: # %entry
; O2-NEXT: sll $1, $1, 16
; O2-NEXT: jr $ra
; O2-NEXT: sra $2, $1, 16
;
; O3-LABEL: AtomicLoadAdd16:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: addiu $2, $zero, -4
; O3-NEXT: lw $1, %got(z)($1)
; O3-NEXT: and $2, $1, $2
; O3-NEXT: andi $1, $1, 3
; O3-NEXT: sll $3, $1, 3
; O3-NEXT: ori $1, $zero, 65535
; O3-NEXT: sllv $5, $1, $3
; O3-NEXT: sllv $4, $4, $3
; O3-NEXT: nor $6, $zero, $5
; O3-NEXT: $BB14_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $7, 0($2)
; O3-NEXT: addu $8, $7, $4
; O3-NEXT: and $8, $8, $5
; O3-NEXT: and $9, $7, $6
; O3-NEXT: or $9, $9, $8
; O3-NEXT: sc $9, 0($2)
; O3-NEXT: beqz $9, $BB14_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: and $1, $7, $5
; O3-NEXT: srlv $1, $1, $3
; O3-NEXT: sll $1, $1, 16
; O3-NEXT: sra $1, $1, 16
; O3-NEXT: # %bb.3: # %entry
; O3-NEXT: sll $1, $1, 16
; O3-NEXT: jr $ra
; O3-NEXT: sra $2, $1, 16
;
; MIPS32EB-LABEL: AtomicLoadAdd16:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(z)($1)
; MIPS32EB-NEXT: addiu $2, $zero, -4
; MIPS32EB-NEXT: and $2, $1, $2
; MIPS32EB-NEXT: andi $1, $1, 3
; MIPS32EB-NEXT: xori $1, $1, 2
; MIPS32EB-NEXT: sll $3, $1, 3
; MIPS32EB-NEXT: ori $1, $zero, 65535
; MIPS32EB-NEXT: sllv $5, $1, $3
; MIPS32EB-NEXT: nor $6, $zero, $5
; MIPS32EB-NEXT: sllv $4, $4, $3
; MIPS32EB-NEXT: $BB14_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $7, 0($2)
; MIPS32EB-NEXT: addu $8, $7, $4
; MIPS32EB-NEXT: and $8, $8, $5
; MIPS32EB-NEXT: and $9, $7, $6
; MIPS32EB-NEXT: or $9, $9, $8
; MIPS32EB-NEXT: sc $9, 0($2)
; MIPS32EB-NEXT: beqz $9, $BB14_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: and $1, $7, $5
; MIPS32EB-NEXT: srlv $1, $1, $3
; MIPS32EB-NEXT: sll $1, $1, 16
; MIPS32EB-NEXT: sra $1, $1, 16
; MIPS32EB-NEXT: # %bb.3: # %entry
; MIPS32EB-NEXT: sll $1, $1, 16
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: sra $2, $1, 16
entry:
%0 = atomicrmw add i16* @z, i16 %incr monotonic
ret i16 %0
}
; Test that the i16 return value from cmpxchg is recognised as signed,
; so that setCC doesn't end up comparing an unsigned value to a signed
; value.
; The rest of the functions here are testing the atomic expansion, so
; we just match the end of the function.
define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) {
; MIPS32-LABEL: foo:
; MIPS32: # %bb.0:
; MIPS32-NEXT: addu $1, $5, $6
; MIPS32-NEXT: sync
; MIPS32-NEXT: addiu $2, $zero, -4
; MIPS32-NEXT: and $3, $4, $2
; MIPS32-NEXT: andi $2, $4, 3
; MIPS32-NEXT: sll $4, $2, 3
; MIPS32-NEXT: ori $2, $zero, 65535
; MIPS32-NEXT: sllv $5, $2, $4
; MIPS32-NEXT: nor $6, $zero, $5
; MIPS32-NEXT: andi $2, $1, 65535
; MIPS32-NEXT: sllv $8, $2, $4
; MIPS32-NEXT: andi $2, $7, 65535
; MIPS32-NEXT: sllv $7, $2, $4
; MIPS32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $9, 0($3)
; MIPS32-NEXT: and $10, $9, $5
; MIPS32-NEXT: bne $10, $8, $BB15_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS32-NEXT: and $9, $9, $6
; MIPS32-NEXT: or $9, $9, $7
; MIPS32-NEXT: sc $9, 0($3)
; MIPS32-NEXT: beqz $9, $BB15_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB15_3:
; MIPS32-NEXT: srlv $2, $10, $4
; MIPS32-NEXT: sll $2, $2, 16
; MIPS32-NEXT: sra $2, $2, 16
; MIPS32-NEXT: # %bb.4:
; MIPS32-NEXT: sll $1, $1, 16
; MIPS32-NEXT: sra $1, $1, 16
; MIPS32-NEXT: xor $1, $2, $1
; MIPS32-NEXT: sltiu $3, $1, 1
; MIPS32-NEXT: sync
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: foo:
; MIPS32O0: # %bb.0:
; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: .cfi_def_cfa_offset 8
; MIPS32O0-NEXT: addu $1, $5, $6
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: addiu $2, $zero, -4
; MIPS32O0-NEXT: and $2, $4, $2
; MIPS32O0-NEXT: andi $3, $4, 3
; MIPS32O0-NEXT: sll $3, $3, 3
; MIPS32O0-NEXT: ori $4, $zero, 65535
; MIPS32O0-NEXT: sllv $4, $4, $3
; MIPS32O0-NEXT: nor $5, $zero, $4
; MIPS32O0-NEXT: andi $6, $1, 65535
; MIPS32O0-NEXT: sllv $6, $6, $3
; MIPS32O0-NEXT: andi $7, $7, 65535
; MIPS32O0-NEXT: sllv $7, $7, $3
; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $9, 0($2)
; MIPS32O0-NEXT: and $10, $9, $4
; MIPS32O0-NEXT: bne $10, $6, $BB15_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS32O0-NEXT: and $9, $9, $5
; MIPS32O0-NEXT: or $9, $9, $7
; MIPS32O0-NEXT: sc $9, 0($2)
; MIPS32O0-NEXT: beqz $9, $BB15_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB15_3:
; MIPS32O0-NEXT: srlv $8, $10, $3
; MIPS32O0-NEXT: sll $8, $8, 16
; MIPS32O0-NEXT: sra $8, $8, 16
; MIPS32O0-NEXT: # %bb.4:
; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: # %bb.5:
; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: sll $2, $1, 16
; MIPS32O0-NEXT: sra $2, $2, 16
; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: xor $2, $3, $2
; MIPS32O0-NEXT: sltiu $3, $2, 1
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: foo:
; MIPS32R2: # %bb.0:
; MIPS32R2-NEXT: addu $1, $5, $6
; MIPS32R2-NEXT: sync
; MIPS32R2-NEXT: addiu $2, $zero, -4
; MIPS32R2-NEXT: and $3, $4, $2
; MIPS32R2-NEXT: andi $2, $4, 3
; MIPS32R2-NEXT: sll $4, $2, 3
; MIPS32R2-NEXT: ori $2, $zero, 65535
; MIPS32R2-NEXT: sllv $5, $2, $4
; MIPS32R2-NEXT: nor $6, $zero, $5
; MIPS32R2-NEXT: andi $2, $1, 65535
; MIPS32R2-NEXT: sllv $8, $2, $4
; MIPS32R2-NEXT: andi $2, $7, 65535
; MIPS32R2-NEXT: sllv $7, $2, $4
; MIPS32R2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $9, 0($3)
; MIPS32R2-NEXT: and $10, $9, $5
; MIPS32R2-NEXT: bne $10, $8, $BB15_3
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS32R2-NEXT: and $9, $9, $6
; MIPS32R2-NEXT: or $9, $9, $7
; MIPS32R2-NEXT: sc $9, 0($3)
; MIPS32R2-NEXT: beqz $9, $BB15_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: $BB15_3:
; MIPS32R2-NEXT: srlv $2, $10, $4
; MIPS32R2-NEXT: seh $2, $2
; MIPS32R2-NEXT: # %bb.4:
; MIPS32R2-NEXT: seh $1, $1
; MIPS32R2-NEXT: xor $1, $2, $1
; MIPS32R2-NEXT: sltiu $3, $1, 1
; MIPS32R2-NEXT: sync
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: foo:
; MIPS32R6: # %bb.0:
; MIPS32R6-NEXT: addu $1, $5, $6
; MIPS32R6-NEXT: sync
; MIPS32R6-NEXT: addiu $2, $zero, -4
; MIPS32R6-NEXT: and $3, $4, $2
; MIPS32R6-NEXT: andi $2, $4, 3
; MIPS32R6-NEXT: sll $4, $2, 3
; MIPS32R6-NEXT: ori $2, $zero, 65535
; MIPS32R6-NEXT: sllv $5, $2, $4
; MIPS32R6-NEXT: nor $6, $zero, $5
; MIPS32R6-NEXT: andi $2, $1, 65535
; MIPS32R6-NEXT: sllv $8, $2, $4
; MIPS32R6-NEXT: andi $2, $7, 65535
; MIPS32R6-NEXT: sllv $7, $2, $4
; MIPS32R6-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $9, 0($3)
; MIPS32R6-NEXT: and $10, $9, $5
; MIPS32R6-NEXT: bnec $10, $8, $BB15_3
; MIPS32R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS32R6-NEXT: and $9, $9, $6
; MIPS32R6-NEXT: or $9, $9, $7
; MIPS32R6-NEXT: sc $9, 0($3)
; MIPS32R6-NEXT: beqzc $9, $BB15_1
; MIPS32R6-NEXT: $BB15_3:
; MIPS32R6-NEXT: srlv $2, $10, $4
; MIPS32R6-NEXT: seh $2, $2
; MIPS32R6-NEXT: # %bb.4:
; MIPS32R6-NEXT: seh $1, $1
; MIPS32R6-NEXT: xor $1, $2, $1
; MIPS32R6-NEXT: sltiu $3, $1, 1
; MIPS32R6-NEXT: sync
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: foo:
; MIPS32R6O0: # %bb.0:
; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8
; MIPS32R6O0-NEXT: move $1, $7
; MIPS32R6O0-NEXT: move $2, $6
; MIPS32R6O0-NEXT: move $3, $5
; MIPS32R6O0-NEXT: addu $5, $5, $6
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: addiu $6, $zero, -4
; MIPS32R6O0-NEXT: and $6, $4, $6
; MIPS32R6O0-NEXT: andi $4, $4, 3
; MIPS32R6O0-NEXT: sll $4, $4, 3
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ori $8, $zero, 65535
; MIPS32R6O0-NEXT: sllv $8, $8, $4
; MIPS32R6O0-NEXT: nor $9, $zero, $8
; MIPS32R6O0-NEXT: andi $10, $5, 65535
; MIPS32R6O0-NEXT: sllv $10, $10, $4
; MIPS32R6O0-NEXT: andi $7, $7, 65535
; MIPS32R6O0-NEXT: sllv $7, $7, $4
; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $12, 0($6)
; MIPS32R6O0-NEXT: and $13, $12, $8
; MIPS32R6O0-NEXT: bnec $13, $10, $BB15_3
; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: and $12, $12, $9
; MIPS32R6O0-NEXT: or $12, $12, $7
; MIPS32R6O0-NEXT: sc $12, 0($6)
; MIPS32R6O0-NEXT: beqzc $12, $BB15_1
; MIPS32R6O0-NEXT: $BB15_3:
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: srlv $11, $13, $4
; MIPS32R6O0-NEXT: seh $11, $11
; MIPS32R6O0-NEXT: # %bb.4:
; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5:
; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seh $2, $1
; MIPS32R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: xor $2, $3, $2
; MIPS32R6O0-NEXT: sltiu $3, $2, 1
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: foo:
; MIPS4: # %bb.0:
; MIPS4-NEXT: sll $1, $6, 0
; MIPS4-NEXT: sll $2, $5, 0
; MIPS4-NEXT: addu $1, $2, $1
; MIPS4-NEXT: sync
; MIPS4-NEXT: sll $2, $7, 0
; MIPS4-NEXT: daddiu $3, $zero, -4
; MIPS4-NEXT: and $3, $4, $3
; MIPS4-NEXT: andi $4, $4, 3
; MIPS4-NEXT: sll $4, $4, 3
; MIPS4-NEXT: ori $5, $zero, 65535
; MIPS4-NEXT: sllv $5, $5, $4
; MIPS4-NEXT: nor $6, $zero, $5
; MIPS4-NEXT: andi $7, $1, 65535
; MIPS4-NEXT: sllv $7, $7, $4
; MIPS4-NEXT: andi $2, $2, 65535
; MIPS4-NEXT: sllv $8, $2, $4
; MIPS4-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $9, 0($3)
; MIPS4-NEXT: and $10, $9, $5
; MIPS4-NEXT: bne $10, $7, .LBB15_3
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS4-NEXT: and $9, $9, $6
; MIPS4-NEXT: or $9, $9, $8
; MIPS4-NEXT: sc $9, 0($3)
; MIPS4-NEXT: beqz $9, .LBB15_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: .LBB15_3:
; MIPS4-NEXT: srlv $2, $10, $4
; MIPS4-NEXT: sll $2, $2, 16
; MIPS4-NEXT: sra $2, $2, 16
; MIPS4-NEXT: # %bb.4:
; MIPS4-NEXT: sll $1, $1, 16
; MIPS4-NEXT: sra $1, $1, 16
; MIPS4-NEXT: xor $1, $2, $1
; MIPS4-NEXT: sltiu $3, $1, 1
; MIPS4-NEXT: sync
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: foo:
; MIPS64: # %bb.0:
; MIPS64-NEXT: sll $1, $6, 0
; MIPS64-NEXT: sll $2, $5, 0
; MIPS64-NEXT: addu $1, $2, $1
; MIPS64-NEXT: sync
; MIPS64-NEXT: sll $2, $7, 0
; MIPS64-NEXT: daddiu $3, $zero, -4
; MIPS64-NEXT: and $3, $4, $3
; MIPS64-NEXT: andi $4, $4, 3
; MIPS64-NEXT: sll $4, $4, 3
; MIPS64-NEXT: ori $5, $zero, 65535
; MIPS64-NEXT: sllv $5, $5, $4
; MIPS64-NEXT: nor $6, $zero, $5
; MIPS64-NEXT: andi $7, $1, 65535
; MIPS64-NEXT: sllv $7, $7, $4
; MIPS64-NEXT: andi $2, $2, 65535
; MIPS64-NEXT: sllv $8, $2, $4
; MIPS64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $9, 0($3)
; MIPS64-NEXT: and $10, $9, $5
; MIPS64-NEXT: bne $10, $7, .LBB15_3
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS64-NEXT: and $9, $9, $6
; MIPS64-NEXT: or $9, $9, $8
; MIPS64-NEXT: sc $9, 0($3)
; MIPS64-NEXT: beqz $9, .LBB15_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: .LBB15_3:
; MIPS64-NEXT: srlv $2, $10, $4
; MIPS64-NEXT: sll $2, $2, 16
; MIPS64-NEXT: sra $2, $2, 16
; MIPS64-NEXT: # %bb.4:
; MIPS64-NEXT: sll $1, $1, 16
; MIPS64-NEXT: sra $1, $1, 16
; MIPS64-NEXT: xor $1, $2, $1
; MIPS64-NEXT: sltiu $3, $1, 1
; MIPS64-NEXT: sync
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: foo:
; MIPS64R2: # %bb.0:
; MIPS64R2-NEXT: sll $1, $6, 0
; MIPS64R2-NEXT: sll $2, $5, 0
; MIPS64R2-NEXT: addu $1, $2, $1
; MIPS64R2-NEXT: sync
; MIPS64R2-NEXT: sll $2, $7, 0
; MIPS64R2-NEXT: daddiu $3, $zero, -4
; MIPS64R2-NEXT: and $3, $4, $3
; MIPS64R2-NEXT: andi $4, $4, 3
; MIPS64R2-NEXT: sll $4, $4, 3
; MIPS64R2-NEXT: ori $5, $zero, 65535
; MIPS64R2-NEXT: sllv $5, $5, $4
; MIPS64R2-NEXT: nor $6, $zero, $5
; MIPS64R2-NEXT: andi $7, $1, 65535
; MIPS64R2-NEXT: sllv $7, $7, $4
; MIPS64R2-NEXT: andi $2, $2, 65535
; MIPS64R2-NEXT: sllv $8, $2, $4
; MIPS64R2-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $9, 0($3)
; MIPS64R2-NEXT: and $10, $9, $5
; MIPS64R2-NEXT: bne $10, $7, .LBB15_3
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS64R2-NEXT: and $9, $9, $6
; MIPS64R2-NEXT: or $9, $9, $8
; MIPS64R2-NEXT: sc $9, 0($3)
; MIPS64R2-NEXT: beqz $9, .LBB15_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: .LBB15_3:
; MIPS64R2-NEXT: srlv $2, $10, $4
; MIPS64R2-NEXT: seh $2, $2
; MIPS64R2-NEXT: # %bb.4:
; MIPS64R2-NEXT: seh $1, $1
; MIPS64R2-NEXT: xor $1, $2, $1
; MIPS64R2-NEXT: sltiu $3, $1, 1
; MIPS64R2-NEXT: sync
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: foo:
; MIPS64R6: # %bb.0:
; MIPS64R6-NEXT: sll $1, $6, 0
; MIPS64R6-NEXT: sll $2, $5, 0
; MIPS64R6-NEXT: addu $1, $2, $1
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: sll $2, $7, 0
; MIPS64R6-NEXT: daddiu $3, $zero, -4
; MIPS64R6-NEXT: and $3, $4, $3
; MIPS64R6-NEXT: andi $4, $4, 3
; MIPS64R6-NEXT: sll $4, $4, 3
; MIPS64R6-NEXT: ori $5, $zero, 65535
; MIPS64R6-NEXT: sllv $5, $5, $4
; MIPS64R6-NEXT: nor $6, $zero, $5
; MIPS64R6-NEXT: andi $7, $1, 65535
; MIPS64R6-NEXT: sllv $7, $7, $4
; MIPS64R6-NEXT: andi $2, $2, 65535
; MIPS64R6-NEXT: sllv $8, $2, $4
; MIPS64R6-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $9, 0($3)
; MIPS64R6-NEXT: and $10, $9, $5
; MIPS64R6-NEXT: bnec $10, $7, .LBB15_3
; MIPS64R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS64R6-NEXT: and $9, $9, $6
; MIPS64R6-NEXT: or $9, $9, $8
; MIPS64R6-NEXT: sc $9, 0($3)
; MIPS64R6-NEXT: beqzc $9, .LBB15_1
; MIPS64R6-NEXT: .LBB15_3:
; MIPS64R6-NEXT: srlv $2, $10, $4
; MIPS64R6-NEXT: seh $2, $2
; MIPS64R6-NEXT: # %bb.4:
; MIPS64R6-NEXT: seh $1, $1
; MIPS64R6-NEXT: xor $1, $2, $1
; MIPS64R6-NEXT: sltiu $3, $1, 1
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: foo:
; MIPS64R6O0: # %bb.0:
; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16
; MIPS64R6O0-NEXT: # kill: def $a3 killed $a3 killed $a3_64
; MIPS64R6O0-NEXT: sll $1, $7, 0
; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64
; MIPS64R6O0-NEXT: sll $2, $6, 0
; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64
; MIPS64R6O0-NEXT: sll $3, $5, 0
; MIPS64R6O0-NEXT: addu $2, $3, $2
; MIPS64R6O0-NEXT: sync
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: daddiu $8, $zero, -4
; MIPS64R6O0-NEXT: and $8, $4, $8
; MIPS64R6O0-NEXT: andi $3, $4, 3
; MIPS64R6O0-NEXT: xori $3, $3, 2
; MIPS64R6O0-NEXT: sll $3, $3, 3
; MIPS64R6O0-NEXT: ori $5, $zero, 65535
; MIPS64R6O0-NEXT: sllv $5, $5, $3
; MIPS64R6O0-NEXT: nor $6, $zero, $5
; MIPS64R6O0-NEXT: andi $7, $2, 65535
; MIPS64R6O0-NEXT: sllv $7, $7, $3
; MIPS64R6O0-NEXT: andi $1, $1, 65535
; MIPS64R6O0-NEXT: sllv $1, $1, $3
; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: ll $10, 0($8)
; MIPS64R6O0-NEXT: and $11, $10, $5
; MIPS64R6O0-NEXT: bnec $11, $7, .LBB15_3
; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: and $10, $10, $6
; MIPS64R6O0-NEXT: or $10, $10, $1
; MIPS64R6O0-NEXT: sc $10, 0($8)
; MIPS64R6O0-NEXT: beqzc $10, .LBB15_1
; MIPS64R6O0-NEXT: .LBB15_3:
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: srlv $9, $11, $3
; MIPS64R6O0-NEXT: seh $9, $9
; MIPS64R6O0-NEXT: # %bb.4:
; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5:
; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: xor $2, $3, $2
; MIPS64R6O0-NEXT: sltiu $3, $2, 1
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: foo:
; MM32: # %bb.0:
; MM32-NEXT: addu16 $3, $5, $6
; MM32-NEXT: sync
; MM32-NEXT: addiu $1, $zero, -4
; MM32-NEXT: and $1, $4, $1
; MM32-NEXT: andi $2, $4, 3
; MM32-NEXT: sll $4, $2, 3
; MM32-NEXT: ori $2, $zero, 65535
; MM32-NEXT: sllv $5, $2, $4
; MM32-NEXT: nor $6, $zero, $5
; MM32-NEXT: andi $2, $3, 65535
; MM32-NEXT: sllv $8, $2, $4
; MM32-NEXT: andi $2, $7, 65535
; MM32-NEXT: sllv $7, $2, $4
; MM32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $9, 0($1)
; MM32-NEXT: and $10, $9, $5
; MM32-NEXT: bne $10, $8, $BB15_3
; MM32-NEXT: nop
; MM32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MM32-NEXT: and $9, $9, $6
; MM32-NEXT: or $9, $9, $7
; MM32-NEXT: sc $9, 0($1)
; MM32-NEXT: beqzc $9, $BB15_1
; MM32-NEXT: $BB15_3:
; MM32-NEXT: srlv $2, $10, $4
; MM32-NEXT: seh $2, $2
; MM32-NEXT: # %bb.4:
; MM32-NEXT: seh $1, $3
; MM32-NEXT: xor $1, $2, $1
; MM32-NEXT: sltiu $3, $1, 1
; MM32-NEXT: sync
; MM32-NEXT: jrc $ra
;
; O1-LABEL: foo:
; O1: # %bb.0:
; O1-NEXT: addu $1, $5, $6
; O1-NEXT: sync
; O1-NEXT: addiu $2, $zero, -4
; O1-NEXT: and $3, $4, $2
; O1-NEXT: andi $2, $4, 3
; O1-NEXT: sll $4, $2, 3
; O1-NEXT: ori $2, $zero, 65535
; O1-NEXT: sllv $5, $2, $4
; O1-NEXT: nor $6, $zero, $5
; O1-NEXT: andi $2, $1, 65535
; O1-NEXT: sllv $8, $2, $4
; O1-NEXT: andi $2, $7, 65535
; O1-NEXT: sllv $7, $2, $4
; O1-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $9, 0($3)
; O1-NEXT: and $10, $9, $5
; O1-NEXT: bne $10, $8, $BB15_3
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; O1-NEXT: and $9, $9, $6
; O1-NEXT: or $9, $9, $7
; O1-NEXT: sc $9, 0($3)
; O1-NEXT: beqz $9, $BB15_1
; O1-NEXT: nop
; O1-NEXT: $BB15_3:
; O1-NEXT: srlv $2, $10, $4
; O1-NEXT: sll $2, $2, 16
; O1-NEXT: sra $2, $2, 16
; O1-NEXT: # %bb.4:
; O1-NEXT: sll $1, $1, 16
; O1-NEXT: sra $1, $1, 16
; O1-NEXT: xor $1, $2, $1
; O1-NEXT: sltiu $3, $1, 1
; O1-NEXT: sync
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: foo:
; O2: # %bb.0:
; O2-NEXT: addu $1, $5, $6
; O2-NEXT: sync
; O2-NEXT: addiu $2, $zero, -4
; O2-NEXT: and $3, $4, $2
; O2-NEXT: andi $2, $4, 3
; O2-NEXT: sll $4, $2, 3
; O2-NEXT: ori $2, $zero, 65535
; O2-NEXT: sllv $5, $2, $4
; O2-NEXT: nor $6, $zero, $5
; O2-NEXT: andi $2, $1, 65535
; O2-NEXT: sllv $8, $2, $4
; O2-NEXT: andi $2, $7, 65535
; O2-NEXT: sllv $7, $2, $4
; O2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $9, 0($3)
; O2-NEXT: and $10, $9, $5
; O2-NEXT: bne $10, $8, $BB15_3
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; O2-NEXT: and $9, $9, $6
; O2-NEXT: or $9, $9, $7
; O2-NEXT: sc $9, 0($3)
; O2-NEXT: beqz $9, $BB15_1
; O2-NEXT: nop
; O2-NEXT: $BB15_3:
; O2-NEXT: srlv $2, $10, $4
; O2-NEXT: sll $2, $2, 16
; O2-NEXT: sra $2, $2, 16
; O2-NEXT: # %bb.4:
; O2-NEXT: sll $1, $1, 16
; O2-NEXT: sra $1, $1, 16
; O2-NEXT: xor $1, $2, $1
; O2-NEXT: sltiu $3, $1, 1
; O2-NEXT: sync
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: foo:
; O3: # %bb.0:
; O3-NEXT: addiu $2, $zero, -4
; O3-NEXT: addu $1, $5, $6
; O3-NEXT: sync
; O3-NEXT: and $3, $4, $2
; O3-NEXT: andi $2, $4, 3
; O3-NEXT: sll $4, $2, 3
; O3-NEXT: ori $2, $zero, 65535
; O3-NEXT: sllv $5, $2, $4
; O3-NEXT: andi $2, $1, 65535
; O3-NEXT: sll $1, $1, 16
; O3-NEXT: sllv $8, $2, $4
; O3-NEXT: andi $2, $7, 65535
; O3-NEXT: nor $6, $zero, $5
; O3-NEXT: sra $1, $1, 16
; O3-NEXT: sllv $7, $2, $4
; O3-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $9, 0($3)
; O3-NEXT: and $10, $9, $5
; O3-NEXT: bne $10, $8, $BB15_3
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; O3-NEXT: and $9, $9, $6
; O3-NEXT: or $9, $9, $7
; O3-NEXT: sc $9, 0($3)
; O3-NEXT: beqz $9, $BB15_1
; O3-NEXT: nop
; O3-NEXT: $BB15_3:
; O3-NEXT: srlv $2, $10, $4
; O3-NEXT: sll $2, $2, 16
; O3-NEXT: sra $2, $2, 16
; O3-NEXT: # %bb.4:
; O3-NEXT: sync
; O3-NEXT: xor $1, $2, $1
; O3-NEXT: jr $ra
; O3-NEXT: sltiu $3, $1, 1
;
; MIPS32EB-LABEL: foo:
; MIPS32EB: # %bb.0:
; MIPS32EB-NEXT: addu $1, $5, $6
; MIPS32EB-NEXT: sync
; MIPS32EB-NEXT: addiu $2, $zero, -4
; MIPS32EB-NEXT: and $3, $4, $2
; MIPS32EB-NEXT: andi $2, $4, 3
; MIPS32EB-NEXT: xori $2, $2, 2
; MIPS32EB-NEXT: sll $4, $2, 3
; MIPS32EB-NEXT: ori $2, $zero, 65535
; MIPS32EB-NEXT: sllv $5, $2, $4
; MIPS32EB-NEXT: nor $6, $zero, $5
; MIPS32EB-NEXT: andi $2, $1, 65535
; MIPS32EB-NEXT: sllv $8, $2, $4
; MIPS32EB-NEXT: andi $2, $7, 65535
; MIPS32EB-NEXT: sllv $7, $2, $4
; MIPS32EB-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $9, 0($3)
; MIPS32EB-NEXT: and $10, $9, $5
; MIPS32EB-NEXT: bne $10, $8, $BB15_3
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1
; MIPS32EB-NEXT: and $9, $9, $6
; MIPS32EB-NEXT: or $9, $9, $7
; MIPS32EB-NEXT: sc $9, 0($3)
; MIPS32EB-NEXT: beqz $9, $BB15_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: $BB15_3:
; MIPS32EB-NEXT: srlv $2, $10, $4
; MIPS32EB-NEXT: sll $2, $2, 16
; MIPS32EB-NEXT: sra $2, $2, 16
; MIPS32EB-NEXT: # %bb.4:
; MIPS32EB-NEXT: sll $1, $1, 16
; MIPS32EB-NEXT: sra $1, $1, 16
; MIPS32EB-NEXT: xor $1, $2, $1
; MIPS32EB-NEXT: sltiu $3, $1, 1
; MIPS32EB-NEXT: sync
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
%desired = add i16 %l, %r
%res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst seq_cst
ret {i16, i1} %res
}
@countsint = common global i32 0, align 4
define i32 @CheckSync(i32 signext %v) nounwind noinline {
; MIPS32-LABEL: CheckSync:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: sync
; MIPS32-NEXT: lw $1, %got(countsint)($1)
; MIPS32-NEXT: $BB16_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: addu $3, $2, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB16_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: sync
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: CheckSync:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: lw $1, %got(countsint)($1)
; MIPS32O0-NEXT: $BB16_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: addu $3, $2, $4
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB16_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: CheckSync:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: sync
; MIPS32R2-NEXT: lw $1, %got(countsint)($1)
; MIPS32R2-NEXT: $BB16_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: addu $3, $2, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB16_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: sync
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: CheckSync:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: sync
; MIPS32R6-NEXT: lw $1, %got(countsint)($1)
; MIPS32R6-NEXT: $BB16_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: addu $3, $2, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB16_1
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: sync
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: CheckSync:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: lw $1, %got(countsint)($1)
; MIPS32R6O0-NEXT: $BB16_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: addu $3, $2, $4
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB16_1
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: CheckSync:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync)))
; MIPS4-NEXT: sync
; MIPS4-NEXT: ld $1, %got_disp(countsint)($1)
; MIPS4-NEXT: .LBB16_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: addu $3, $2, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB16_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: sync
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: CheckSync:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync)))
; MIPS64-NEXT: sync
; MIPS64-NEXT: ld $1, %got_disp(countsint)($1)
; MIPS64-NEXT: .LBB16_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: addu $3, $2, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB16_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: sync
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: CheckSync:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync)))
; MIPS64R2-NEXT: sync
; MIPS64R2-NEXT: ld $1, %got_disp(countsint)($1)
; MIPS64R2-NEXT: .LBB16_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: addu $3, $2, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB16_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: sync
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: CheckSync:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync)))
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: ld $1, %got_disp(countsint)($1)
; MIPS64R6-NEXT: .LBB16_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: addu $3, $2, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB16_1
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: CheckSync:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: ld $1, %got_disp(countsint)($1)
; MIPS64R6O0-NEXT: .LBB16_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: addu $3, $2, $4
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB16_1
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: CheckSync:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: sync
; MM32-NEXT: lw $1, %got(countsint)($2)
; MM32-NEXT: $BB16_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: addu16 $3, $2, $4
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB16_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: sync
; MM32-NEXT: jrc $ra
;
; O1-LABEL: CheckSync:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: sync
; O1-NEXT: lw $1, %got(countsint)($1)
; O1-NEXT: $BB16_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: addu $3, $2, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB16_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: sync
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: CheckSync:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: sync
; O2-NEXT: lw $1, %got(countsint)($1)
; O2-NEXT: $BB16_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: addu $3, $2, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB16_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: sync
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: CheckSync:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: sync
; O3-NEXT: lw $1, %got(countsint)($1)
; O3-NEXT: $BB16_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: addu $3, $2, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB16_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: sync
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: CheckSync:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: sync
; MIPS32EB-NEXT: lw $1, %got(countsint)($1)
; MIPS32EB-NEXT: $BB16_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: addu $3, $2, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB16_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: sync
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw add i32* @countsint, i32 %v seq_cst
ret i32 %0
}
; make sure that this assertion in
; TwoAddressInstructionPass::TryInstructionTransform does not fail:
;
; line 1203: assert(TargetRegisterInfo::isVirtualRegister(regB) &&
;
; it failed when MipsDAGToDAGISel::ReplaceUsesWithZeroReg replaced an
; operand of an atomic instruction with register $zero.
@a = external global i32
define i32 @zeroreg() nounwind {
; MIPS32-LABEL: zeroreg:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: sync
; MIPS32-NEXT: addiu $2, $zero, 0
; MIPS32-NEXT: addiu $3, $zero, 1
; MIPS32-NEXT: lw $1, %got(a)($1)
; MIPS32-NEXT: $BB17_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $4, 0($1)
; MIPS32-NEXT: bne $4, $3, $BB17_3
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS32-NEXT: move $5, $2
; MIPS32-NEXT: sc $5, 0($1)
; MIPS32-NEXT: beqz $5, $BB17_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: $BB17_3: # %entry
; MIPS32-NEXT: xor $1, $4, $3
; MIPS32-NEXT: sltiu $2, $1, 1
; MIPS32-NEXT: sync
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: zeroreg:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: lw $1, %got(a)($1)
; MIPS32O0-NEXT: addiu $2, $zero, 0
; MIPS32O0-NEXT: addiu $3, $zero, 1
; MIPS32O0-NEXT: move $4, $3
; MIPS32O0-NEXT: $BB17_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $5, 0($1)
; MIPS32O0-NEXT: bne $5, $4, $BB17_3
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS32O0-NEXT: move $6, $2
; MIPS32O0-NEXT: sc $6, 0($1)
; MIPS32O0-NEXT: beqz $6, $BB17_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: $BB17_3: # %entry
; MIPS32O0-NEXT: xor $1, $5, $3
; MIPS32O0-NEXT: sltiu $1, $1, 1
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: addiu $2, $zero, 1
; MIPS32O0-NEXT: xor $2, $5, $2
; MIPS32O0-NEXT: sltiu $2, $2, 1
; MIPS32O0-NEXT: andi $2, $2, 1
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: zeroreg:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: sync
; MIPS32R2-NEXT: addiu $2, $zero, 0
; MIPS32R2-NEXT: addiu $3, $zero, 1
; MIPS32R2-NEXT: lw $1, %got(a)($1)
; MIPS32R2-NEXT: $BB17_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $4, 0($1)
; MIPS32R2-NEXT: bne $4, $3, $BB17_3
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS32R2-NEXT: move $5, $2
; MIPS32R2-NEXT: sc $5, 0($1)
; MIPS32R2-NEXT: beqz $5, $BB17_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: $BB17_3: # %entry
; MIPS32R2-NEXT: xor $1, $4, $3
; MIPS32R2-NEXT: sltiu $2, $1, 1
; MIPS32R2-NEXT: sync
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: zeroreg:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: sync
; MIPS32R6-NEXT: addiu $2, $zero, 0
; MIPS32R6-NEXT: addiu $3, $zero, 1
; MIPS32R6-NEXT: lw $1, %got(a)($1)
; MIPS32R6-NEXT: $BB17_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $4, 0($1)
; MIPS32R6-NEXT: bnec $4, $3, $BB17_3
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS32R6-NEXT: move $5, $2
; MIPS32R6-NEXT: sc $5, 0($1)
; MIPS32R6-NEXT: beqzc $5, $BB17_1
; MIPS32R6-NEXT: $BB17_3: # %entry
; MIPS32R6-NEXT: xor $1, $4, $3
; MIPS32R6-NEXT: sltiu $2, $1, 1
; MIPS32R6-NEXT: sync
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: zeroreg:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: lw $1, %got(a)($1)
; MIPS32R6O0-NEXT: addiu $2, $zero, 0
; MIPS32R6O0-NEXT: addiu $3, $zero, 1
; MIPS32R6O0-NEXT: move $4, $3
; MIPS32R6O0-NEXT: $BB17_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: ll $5, 0($1)
; MIPS32R6O0-NEXT: bnec $5, $4, $BB17_3
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS32R6O0-NEXT: move $6, $2
; MIPS32R6O0-NEXT: sc $6, 0($1)
; MIPS32R6O0-NEXT: beqzc $6, $BB17_1
; MIPS32R6O0-NEXT: $BB17_3: # %entry
; MIPS32R6O0-NEXT: xor $1, $5, $3
; MIPS32R6O0-NEXT: sltiu $2, $1, 1
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: zeroreg:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg)))
; MIPS4-NEXT: sync
; MIPS4-NEXT: addiu $2, $zero, 0
; MIPS4-NEXT: addiu $3, $zero, 1
; MIPS4-NEXT: ld $1, %got_disp(a)($1)
; MIPS4-NEXT: .LBB17_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $4, 0($1)
; MIPS4-NEXT: bne $4, $3, .LBB17_3
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS4-NEXT: move $5, $2
; MIPS4-NEXT: sc $5, 0($1)
; MIPS4-NEXT: beqz $5, .LBB17_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: .LBB17_3: # %entry
; MIPS4-NEXT: xor $1, $4, $3
; MIPS4-NEXT: sltiu $2, $1, 1
; MIPS4-NEXT: sync
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: zeroreg:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg)))
; MIPS64-NEXT: sync
; MIPS64-NEXT: addiu $2, $zero, 0
; MIPS64-NEXT: addiu $3, $zero, 1
; MIPS64-NEXT: ld $1, %got_disp(a)($1)
; MIPS64-NEXT: .LBB17_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $4, 0($1)
; MIPS64-NEXT: bne $4, $3, .LBB17_3
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS64-NEXT: move $5, $2
; MIPS64-NEXT: sc $5, 0($1)
; MIPS64-NEXT: beqz $5, .LBB17_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: .LBB17_3: # %entry
; MIPS64-NEXT: xor $1, $4, $3
; MIPS64-NEXT: sltiu $2, $1, 1
; MIPS64-NEXT: sync
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: zeroreg:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg)))
; MIPS64R2-NEXT: sync
; MIPS64R2-NEXT: addiu $2, $zero, 0
; MIPS64R2-NEXT: addiu $3, $zero, 1
; MIPS64R2-NEXT: ld $1, %got_disp(a)($1)
; MIPS64R2-NEXT: .LBB17_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $4, 0($1)
; MIPS64R2-NEXT: bne $4, $3, .LBB17_3
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS64R2-NEXT: move $5, $2
; MIPS64R2-NEXT: sc $5, 0($1)
; MIPS64R2-NEXT: beqz $5, .LBB17_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: .LBB17_3: # %entry
; MIPS64R2-NEXT: xor $1, $4, $3
; MIPS64R2-NEXT: sltiu $2, $1, 1
; MIPS64R2-NEXT: sync
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: zeroreg:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg)))
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: addiu $2, $zero, 0
; MIPS64R6-NEXT: addiu $3, $zero, 1
; MIPS64R6-NEXT: ld $1, %got_disp(a)($1)
; MIPS64R6-NEXT: .LBB17_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $4, 0($1)
; MIPS64R6-NEXT: bnec $4, $3, .LBB17_3
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS64R6-NEXT: move $5, $2
; MIPS64R6-NEXT: sc $5, 0($1)
; MIPS64R6-NEXT: beqzc $5, .LBB17_1
; MIPS64R6-NEXT: .LBB17_3: # %entry
; MIPS64R6-NEXT: xor $1, $4, $3
; MIPS64R6-NEXT: sltiu $2, $1, 1
; MIPS64R6-NEXT: sync
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: zeroreg:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg)))
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: ld $1, %got_disp(a)($1)
; MIPS64R6O0-NEXT: addiu $2, $zero, 0
; MIPS64R6O0-NEXT: addiu $3, $zero, 1
; MIPS64R6O0-NEXT: move $4, $3
; MIPS64R6O0-NEXT: .LBB17_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $5, 0($1)
; MIPS64R6O0-NEXT: bnec $5, $4, .LBB17_3
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS64R6O0-NEXT: move $6, $2
; MIPS64R6O0-NEXT: sc $6, 0($1)
; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1
; MIPS64R6O0-NEXT: .LBB17_3: # %entry
; MIPS64R6O0-NEXT: xor $2, $5, $3
; MIPS64R6O0-NEXT: sltiu $2, $2, 1
; MIPS64R6O0-NEXT: sync
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: zeroreg:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: sync
; MM32-NEXT: li16 $3, 0
; MM32-NEXT: li16 $4, 1
; MM32-NEXT: lw $1, %got(a)($2)
; MM32-NEXT: $BB17_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: bne $2, $4, $BB17_3
; MM32-NEXT: nop
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: # in Loop: Header=BB17_1 Depth=1
; MM32-NEXT: move $5, $3
; MM32-NEXT: sc $5, 0($1)
; MM32-NEXT: beqzc $5, $BB17_1
; MM32-NEXT: $BB17_3: # %entry
; MM32-NEXT: xor $1, $2, $4
; MM32-NEXT: sltiu $2, $1, 1
; MM32-NEXT: sync
; MM32-NEXT: jrc $ra
;
; O1-LABEL: zeroreg:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: sync
; O1-NEXT: addiu $2, $zero, 0
; O1-NEXT: addiu $3, $zero, 1
; O1-NEXT: lw $1, %got(a)($1)
; O1-NEXT: $BB17_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $4, 0($1)
; O1-NEXT: bne $4, $3, $BB17_3
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: # in Loop: Header=BB17_1 Depth=1
; O1-NEXT: move $5, $2
; O1-NEXT: sc $5, 0($1)
; O1-NEXT: beqz $5, $BB17_1
; O1-NEXT: nop
; O1-NEXT: $BB17_3: # %entry
; O1-NEXT: xor $1, $4, $3
; O1-NEXT: sltiu $2, $1, 1
; O1-NEXT: sync
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: zeroreg:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: sync
; O2-NEXT: addiu $2, $zero, 0
; O2-NEXT: addiu $3, $zero, 1
; O2-NEXT: lw $1, %got(a)($1)
; O2-NEXT: $BB17_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $4, 0($1)
; O2-NEXT: bne $4, $3, $BB17_3
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: # in Loop: Header=BB17_1 Depth=1
; O2-NEXT: move $5, $2
; O2-NEXT: sc $5, 0($1)
; O2-NEXT: beqz $5, $BB17_1
; O2-NEXT: nop
; O2-NEXT: $BB17_3: # %entry
; O2-NEXT: xor $1, $4, $3
; O2-NEXT: sltiu $2, $1, 1
; O2-NEXT: sync
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: zeroreg:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: addiu $2, $zero, 0
; O3-NEXT: addiu $3, $zero, 1
; O3-NEXT: sync
; O3-NEXT: lw $1, %got(a)($1)
; O3-NEXT: $BB17_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $4, 0($1)
; O3-NEXT: bne $4, $3, $BB17_3
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: # in Loop: Header=BB17_1 Depth=1
; O3-NEXT: move $5, $2
; O3-NEXT: sc $5, 0($1)
; O3-NEXT: beqz $5, $BB17_1
; O3-NEXT: nop
; O3-NEXT: $BB17_3: # %entry
; O3-NEXT: sync
; O3-NEXT: xor $1, $4, $3
; O3-NEXT: jr $ra
; O3-NEXT: sltiu $2, $1, 1
;
; MIPS32EB-LABEL: zeroreg:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: sync
; MIPS32EB-NEXT: addiu $2, $zero, 0
; MIPS32EB-NEXT: addiu $3, $zero, 1
; MIPS32EB-NEXT: lw $1, %got(a)($1)
; MIPS32EB-NEXT: $BB17_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $4, 0($1)
; MIPS32EB-NEXT: bne $4, $3, $BB17_3
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: # in Loop: Header=BB17_1 Depth=1
; MIPS32EB-NEXT: move $5, $2
; MIPS32EB-NEXT: sc $5, 0($1)
; MIPS32EB-NEXT: beqz $5, $BB17_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: $BB17_3: # %entry
; MIPS32EB-NEXT: xor $1, $4, $3
; MIPS32EB-NEXT: sltiu $2, $1, 1
; MIPS32EB-NEXT: sync
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%pair0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst
%0 = extractvalue { i32, i1 } %pair0, 0
%1 = icmp eq i32 %0, 1
%conv = zext i1 %1 to i32
ret i32 %conv
}
; Check that MIPS32R6 has the correct offset range.
; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
; MIPS32-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS32: # %bb.0: # %entry
; MIPS32-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-NEXT: addu $1, $2, $25
; MIPS32-NEXT: lw $1, %got(x)($1)
; MIPS32-NEXT: addiu $1, $1, 1024
; MIPS32-NEXT: $BB18_1: # %entry
; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32-NEXT: ll $2, 0($1)
; MIPS32-NEXT: addu $3, $2, $4
; MIPS32-NEXT: sc $3, 0($1)
; MIPS32-NEXT: beqz $3, $BB18_1
; MIPS32-NEXT: nop
; MIPS32-NEXT: # %bb.2: # %entry
; MIPS32-NEXT: jr $ra
; MIPS32-NEXT: nop
;
; MIPS32O0-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: lw $1, %got(x)($1)
; MIPS32O0-NEXT: addiu $1, $1, 1024
; MIPS32O0-NEXT: $BB18_1: # %entry
; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32O0-NEXT: ll $2, 0($1)
; MIPS32O0-NEXT: addu $3, $2, $4
; MIPS32O0-NEXT: sc $3, 0($1)
; MIPS32O0-NEXT: beqz $3, $BB18_1
; MIPS32O0-NEXT: nop
; MIPS32O0-NEXT: # %bb.2: # %entry
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
; MIPS32R2-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS32R2: # %bb.0: # %entry
; MIPS32R2-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R2-NEXT: addu $1, $2, $25
; MIPS32R2-NEXT: lw $1, %got(x)($1)
; MIPS32R2-NEXT: addiu $1, $1, 1024
; MIPS32R2-NEXT: $BB18_1: # %entry
; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R2-NEXT: ll $2, 0($1)
; MIPS32R2-NEXT: addu $3, $2, $4
; MIPS32R2-NEXT: sc $3, 0($1)
; MIPS32R2-NEXT: beqz $3, $BB18_1
; MIPS32R2-NEXT: nop
; MIPS32R2-NEXT: # %bb.2: # %entry
; MIPS32R2-NEXT: jr $ra
; MIPS32R2-NEXT: nop
;
; MIPS32R6-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS32R6: # %bb.0: # %entry
; MIPS32R6-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-NEXT: addu $1, $2, $25
; MIPS32R6-NEXT: lw $1, %got(x)($1)
; MIPS32R6-NEXT: addiu $1, $1, 1024
; MIPS32R6-NEXT: $BB18_1: # %entry
; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6-NEXT: ll $2, 0($1)
; MIPS32R6-NEXT: addu $3, $2, $4
; MIPS32R6-NEXT: sc $3, 0($1)
; MIPS32R6-NEXT: beqzc $3, $BB18_1
; MIPS32R6-NEXT: nop
; MIPS32R6-NEXT: # %bb.2: # %entry
; MIPS32R6-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: addiu $1, $1, 1024
; MIPS32R6O0-NEXT: $BB18_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
[FastISel] Skip creating unnecessary vregs for arguments This behavior was added in r130928 for both FastISel and SD, and then disabled in r131156 for FastISel. This re-enables it for FastISel with the corresponding fix. This is triggered only when FastISel can't lower the arguments and falls back to SelectionDAG for it. FastISel contains a map of "register fixups" where at the end of the selection phase it replaces all uses of a register with another register that FastISel sometimes pre-assigned. Code at the end of SelectionDAGISel::runOnMachineFunction is doing the replacement at the very end of the function, while other pieces that come in before that look through the MachineFunction and assume everything is done. In this case, the real issue is that the code emitting COPY instructions for the liveins (physreg to vreg) (EmitLiveInCopies) is checking if the vreg assigned to the physreg is used, and if it's not, it will skip the COPY. If a register wasn't replaced with its assigned fixup yet, the copy will be skipped and we'll end up with uses of undefined registers. This fix moves the replacement of registers before the emission of copies for the live-ins. The initial motivation for this fix is to enable tail calls for swiftself functions, which were blocked because we couldn't prove that the swiftself argument (which is callee-save) comes from a function argument (live-in), because there was an extra copy (vreg to vreg). A few tests are affected by this: * llvm/test/CodeGen/AArch64/swifterror.ll: we used to spill x21 (callee-save) but never reload it because it's attached to the return. We now don't even spill it anymore. * llvm/test/CodeGen/*/swiftself.ll: we tail-call now. * llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll: I believe this test was not really testing the right thing, but it worked because the same registers were re-used. * llvm/test/CodeGen/ARM/cmpxchg-O0.ll: regalloc changes * llvm/test/CodeGen/ARM/swifterror.ll: get rid of a copy * llvm/test/CodeGen/Mips/*: get rid of spills and copies * llvm/test/CodeGen/SystemZ/swift-return.ll: smaller stack * llvm/test/CodeGen/X86/atomic-unordered.ll: smaller stack * llvm/test/CodeGen/X86/swifterror.ll: same as AArch64 * llvm/test/DebugInfo/X86/dbg-declare-arg.ll: stack size changed Differential Revision: https://reviews.llvm.org/D62361 llvm-svn: 362963
2019-06-11 00:53:37 +08:00
; MIPS32R6O0-NEXT: ll $2, 0($1)
; MIPS32R6O0-NEXT: addu $3, $2, $4
; MIPS32R6O0-NEXT: sc $3, 0($1)
; MIPS32R6O0-NEXT: beqzc $3, $BB18_1
; MIPS32R6O0-NEXT: nop
; MIPS32R6O0-NEXT: # %bb.2: # %entry
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS4: # %bb.0: # %entry
; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS4-NEXT: daddu $1, $1, $25
; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS4-NEXT: ld $1, %got_disp(x)($1)
; MIPS4-NEXT: daddiu $1, $1, 1024
; MIPS4-NEXT: .LBB18_1: # %entry
; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS4-NEXT: ll $2, 0($1)
; MIPS4-NEXT: addu $3, $2, $4
; MIPS4-NEXT: sc $3, 0($1)
; MIPS4-NEXT: beqz $3, .LBB18_1
; MIPS4-NEXT: nop
; MIPS4-NEXT: # %bb.2: # %entry
; MIPS4-NEXT: jr $ra
; MIPS4-NEXT: nop
;
; MIPS64-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS64: # %bb.0: # %entry
; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64-NEXT: daddu $1, $1, $25
; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64-NEXT: ld $1, %got_disp(x)($1)
; MIPS64-NEXT: daddiu $1, $1, 1024
; MIPS64-NEXT: .LBB18_1: # %entry
; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64-NEXT: ll $2, 0($1)
; MIPS64-NEXT: addu $3, $2, $4
; MIPS64-NEXT: sc $3, 0($1)
; MIPS64-NEXT: beqz $3, .LBB18_1
; MIPS64-NEXT: nop
; MIPS64-NEXT: # %bb.2: # %entry
; MIPS64-NEXT: jr $ra
; MIPS64-NEXT: nop
;
; MIPS64R2-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS64R2: # %bb.0: # %entry
; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64R2-NEXT: daddu $1, $1, $25
; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64R2-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R2-NEXT: daddiu $1, $1, 1024
; MIPS64R2-NEXT: .LBB18_1: # %entry
; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R2-NEXT: ll $2, 0($1)
; MIPS64R2-NEXT: addu $3, $2, $4
; MIPS64R2-NEXT: sc $3, 0($1)
; MIPS64R2-NEXT: beqz $3, .LBB18_1
; MIPS64R2-NEXT: nop
; MIPS64R2-NEXT: # %bb.2: # %entry
; MIPS64R2-NEXT: jr $ra
; MIPS64R2-NEXT: nop
;
; MIPS64R6-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS64R6: # %bb.0: # %entry
; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64R6-NEXT: daddu $1, $1, $25
; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64R6-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6-NEXT: daddiu $1, $1, 1024
; MIPS64R6-NEXT: .LBB18_1: # %entry
; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6-NEXT: ll $2, 0($1)
; MIPS64R6-NEXT: addu $3, $2, $4
; MIPS64R6-NEXT: sc $3, 0($1)
; MIPS64R6-NEXT: beqzc $3, .LBB18_1
; MIPS64R6-NEXT: nop
; MIPS64R6-NEXT: # %bb.2: # %entry
; MIPS64R6-NEXT: jrc $ra
;
; MIPS64R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS64R6O0: # %bb.0: # %entry
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit)))
; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: daddiu $1, $1, 1024
; MIPS64R6O0-NEXT: .LBB18_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: ll $2, 0($1)
; MIPS64R6O0-NEXT: addu $3, $2, $4
; MIPS64R6O0-NEXT: sc $3, 0($1)
; MIPS64R6O0-NEXT: beqzc $3, .LBB18_1
; MIPS64R6O0-NEXT: nop
; MIPS64R6O0-NEXT: # %bb.2: # %entry
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MM32: # %bb.0: # %entry
; MM32-NEXT: lui $2, %hi(_gp_disp)
; MM32-NEXT: addiu $2, $2, %lo(_gp_disp)
; MM32-NEXT: addu $2, $2, $25
; MM32-NEXT: lw $1, %got(x)($2)
; MM32-NEXT: addiu $1, $1, 1024
; MM32-NEXT: $BB18_1: # %entry
; MM32-NEXT: # =>This Inner Loop Header: Depth=1
; MM32-NEXT: ll $2, 0($1)
; MM32-NEXT: addu16 $3, $2, $4
; MM32-NEXT: sc $3, 0($1)
; MM32-NEXT: beqzc $3, $BB18_1
; MM32-NEXT: # %bb.2: # %entry
; MM32-NEXT: jrc $ra
;
; O1-LABEL: AtomicLoadAdd32_OffGt9Bit:
; O1: # %bb.0: # %entry
; O1-NEXT: lui $2, %hi(_gp_disp)
; O1-NEXT: addiu $2, $2, %lo(_gp_disp)
; O1-NEXT: addu $1, $2, $25
; O1-NEXT: lw $1, %got(x)($1)
; O1-NEXT: addiu $1, $1, 1024
; O1-NEXT: $BB18_1: # %entry
; O1-NEXT: # =>This Inner Loop Header: Depth=1
; O1-NEXT: ll $2, 0($1)
; O1-NEXT: addu $3, $2, $4
; O1-NEXT: sc $3, 0($1)
; O1-NEXT: beqz $3, $BB18_1
; O1-NEXT: nop
; O1-NEXT: # %bb.2: # %entry
; O1-NEXT: jr $ra
; O1-NEXT: nop
;
; O2-LABEL: AtomicLoadAdd32_OffGt9Bit:
; O2: # %bb.0: # %entry
; O2-NEXT: lui $2, %hi(_gp_disp)
; O2-NEXT: addiu $2, $2, %lo(_gp_disp)
; O2-NEXT: addu $1, $2, $25
; O2-NEXT: lw $1, %got(x)($1)
; O2-NEXT: addiu $1, $1, 1024
; O2-NEXT: $BB18_1: # %entry
; O2-NEXT: # =>This Inner Loop Header: Depth=1
; O2-NEXT: ll $2, 0($1)
; O2-NEXT: addu $3, $2, $4
; O2-NEXT: sc $3, 0($1)
; O2-NEXT: beqz $3, $BB18_1
; O2-NEXT: nop
; O2-NEXT: # %bb.2: # %entry
; O2-NEXT: jr $ra
; O2-NEXT: nop
;
; O3-LABEL: AtomicLoadAdd32_OffGt9Bit:
; O3: # %bb.0: # %entry
; O3-NEXT: lui $2, %hi(_gp_disp)
; O3-NEXT: addiu $2, $2, %lo(_gp_disp)
; O3-NEXT: addu $1, $2, $25
; O3-NEXT: lw $1, %got(x)($1)
; O3-NEXT: addiu $1, $1, 1024
; O3-NEXT: $BB18_1: # %entry
; O3-NEXT: # =>This Inner Loop Header: Depth=1
; O3-NEXT: ll $2, 0($1)
; O3-NEXT: addu $3, $2, $4
; O3-NEXT: sc $3, 0($1)
; O3-NEXT: beqz $3, $BB18_1
; O3-NEXT: nop
; O3-NEXT: # %bb.2: # %entry
; O3-NEXT: jr $ra
; O3-NEXT: nop
;
; MIPS32EB-LABEL: AtomicLoadAdd32_OffGt9Bit:
; MIPS32EB: # %bb.0: # %entry
; MIPS32EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32EB-NEXT: addu $1, $2, $25
; MIPS32EB-NEXT: lw $1, %got(x)($1)
; MIPS32EB-NEXT: addiu $1, $1, 1024
; MIPS32EB-NEXT: $BB18_1: # %entry
; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32EB-NEXT: ll $2, 0($1)
; MIPS32EB-NEXT: addu $3, $2, $4
; MIPS32EB-NEXT: sc $3, 0($1)
; MIPS32EB-NEXT: beqz $3, $BB18_1
; MIPS32EB-NEXT: nop
; MIPS32EB-NEXT: # %bb.2: # %entry
; MIPS32EB-NEXT: jr $ra
; MIPS32EB-NEXT: nop
entry:
%0 = atomicrmw add i32* getelementptr(i32, i32* @x, i32 256), i32 %incr monotonic
ret i32 %0
}