[AArch64][GlobalISel] Fix atomic truncating stores from generating invalid copies.

If the source reg is a 64b vreg, then we need to emit a subreg copy to a 32b
gpr before we select sub-64b variants like STLRW.
This commit is contained in:
Amara Emerson 2021-11-09 20:41:26 -08:00
parent d71bb6a409
commit af4dc633f8
2 changed files with 158 additions and 0 deletions

View File

@ -2770,6 +2770,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
Register ValReg = LdSt.getReg(0);
if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
// Emit a subreg copy of 32 bits.
Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
.addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
I.getOperand(0).setReg(NewVal);
}
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);

View File

@ -0,0 +1,150 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
---
name: truncstore_atomic_32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
body: |
; CHECK-LABEL: name: truncstore_atomic_32
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-NEXT: STLRW [[COPY2]], [[COPY]] :: (store release (s32))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
liveins: $w1, $x0
%0:gpr(p0) = COPY $x0
%3:gpr(s32) = COPY $w1
%2:gpr(s8) = G_TRUNC %3(s32)
%4:gpr(s8) = G_ASSERT_ZEXT %2, 1
%1:gpr(s1) = G_TRUNC %4(s8)
G_BRCOND %1(s1), %bb.3
G_BR %bb.2
bb.2:
%8:gpr(s64) = G_CONSTANT i64 4
G_STORE %8(s64), %0(p0) :: (store release (s32))
bb.3:
RET_ReallyLR
...
---
name: truncstore_atomic_16
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
body: |
; CHECK-LABEL: name: truncstore_atomic_16
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-NEXT: STLRH [[COPY2]], [[COPY]] :: (store release (s16))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
liveins: $w1, $x0
%0:gpr(p0) = COPY $x0
%3:gpr(s32) = COPY $w1
%2:gpr(s8) = G_TRUNC %3(s32)
%4:gpr(s8) = G_ASSERT_ZEXT %2, 1
%1:gpr(s1) = G_TRUNC %4(s8)
G_BRCOND %1(s1), %bb.3
G_BR %bb.2
bb.2:
%8:gpr(s64) = G_CONSTANT i64 4
G_STORE %8(s64), %0(p0) :: (store release (s16))
bb.3:
RET_ReallyLR
...
---
name: truncstore_atomic_8
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
- { reg: '$w1' }
body: |
; CHECK-LABEL: name: truncstore_atomic_8
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
; CHECK-NEXT: STLRB [[COPY2]], [[COPY]] :: (store release (s8))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
liveins: $w1, $x0
%0:gpr(p0) = COPY $x0
%3:gpr(s32) = COPY $w1
%2:gpr(s8) = G_TRUNC %3(s32)
%4:gpr(s8) = G_ASSERT_ZEXT %2, 1
%1:gpr(s1) = G_TRUNC %4(s8)
G_BRCOND %1(s1), %bb.3
G_BR %bb.2
bb.2:
%8:gpr(s64) = G_CONSTANT i64 4
G_STORE %8(s64), %0(p0) :: (store release (s8))
bb.3:
RET_ReallyLR
...