AArch64: support atomic zext/sextloads

This commit is contained in:
Tim Northover 2021-05-05 13:12:55 +01:00
parent 263a89c9b7
commit b16ddd0375
3 changed files with 153 additions and 2 deletions

View File

@ -2623,8 +2623,23 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Need special instructions for atomics that affect ordering.
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic)
return false;
Order != AtomicOrdering::Monotonic) {
assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
if (MemSizeInBytes > 64)
return false;
if (I.getOpcode() == TargetOpcode::G_LOAD) {
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
AArch64::LDARW, AArch64::LDARX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
#ifndef NDEBUG
const Register PtrReg = I.getOperand(1).getReg();

View File

@ -251,6 +251,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
.legalForTypesWithMemDesc({{s32, p0, 8, 8},
{s32, p0, 16, 8},
{s32, p0, 32, 8},

View File

@ -1189,4 +1189,139 @@ define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 {
ret void
}
define i32 @load_zext(i8* %p8, i16* %p16) {
; CHECK-NOLSE-O1-LABEL: load_zext:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0]
; CHECK-NOLSE-O1-NEXT: ldrh w9, [x1]
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, uxtb
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-NOLSE-O0-LABEL: load_zext:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0]
; CHECK-NOLSE-O0-NEXT: ldrh w8, [x1]
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT: ret
;
; CHECK-LSE-O1-LABEL: load_zext:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldarb w8, [x0]
; CHECK-LSE-O1-NEXT: ldrh w9, [x1]
; CHECK-LSE-O1-NEXT: add w0, w9, w8, uxtb
; CHECK-LSE-O1-NEXT: ret
;
; CHECK-LSE-O0-LABEL: load_zext:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: ldarb w9, [x0]
; CHECK-LSE-O0-NEXT: ldrh w8, [x1]
; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxtb
; CHECK-LSE-O0-NEXT: ret
%val1.8 = load atomic i8, i8* %p8 acquire, align 1
%val1 = zext i8 %val1.8 to i32
%val2.16 = load atomic i16, i16* %p16 unordered, align 2
%val2 = zext i16 %val2.16 to i32
%res = add i32 %val1, %val2
ret i32 %res
}
define { i32, i64 } @load_acq(i32* %p32, i64* %p64) {
; CHECK-NOLSE-LABEL: load_acq:
; CHECK-NOLSE: ; %bb.0:
; CHECK-NOLSE-NEXT: ldar w0, [x0]
; CHECK-NOLSE-NEXT: ldar x1, [x1]
; CHECK-NOLSE-NEXT: ret
;
; CHECK-LSE-O1-LABEL: load_acq:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldar w0, [x0]
; CHECK-LSE-O1-NEXT: ldar x1, [x1]
; CHECK-LSE-O1-NEXT: ret
;
; CHECK-LSE-O0-LABEL: load_acq:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: ldar w0, [x0]
; CHECK-LSE-O0-NEXT: ldar x1, [x1]
; CHECK-LSE-O0-NEXT: ret
%val32 = load atomic i32, i32* %p32 seq_cst, align 4
%tmp = insertvalue { i32, i64 } undef, i32 %val32, 0
%val64 = load atomic i64, i64* %p64 acquire, align 8
%res = insertvalue { i32, i64 } %tmp, i64 %val64, 1
ret { i32, i64 } %res
}
define i32 @load_sext(i8* %p8, i16* %p16) {
; CHECK-NOLSE-O1-LABEL: load_sext:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0]
; CHECK-NOLSE-O1-NEXT: ldrh w9, [x1]
; CHECK-NOLSE-O1-NEXT: sxth w9, w9
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8, sxtb
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-NOLSE-O0-LABEL: load_sext:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: ldarb w9, [x0]
; CHECK-NOLSE-O0-NEXT: ldrh w8, [x1]
; CHECK-NOLSE-O0-NEXT: sxth w8, w8
; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, sxtb
; CHECK-NOLSE-O0-NEXT: ret
;
; CHECK-LSE-O1-LABEL: load_sext:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: ldarb w8, [x0]
; CHECK-LSE-O1-NEXT: ldrh w9, [x1]
; CHECK-LSE-O1-NEXT: sxth w9, w9
; CHECK-LSE-O1-NEXT: add w0, w9, w8, sxtb
; CHECK-LSE-O1-NEXT: ret
;
; CHECK-LSE-O0-LABEL: load_sext:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: ldarb w9, [x0]
; CHECK-LSE-O0-NEXT: ldrh w8, [x1]
; CHECK-LSE-O0-NEXT: sxth w8, w8
; CHECK-LSE-O0-NEXT: add w0, w8, w9, sxtb
; CHECK-LSE-O0-NEXT: ret
%val1.8 = load atomic i8, i8* %p8 acquire, align 1
%val1 = sext i8 %val1.8 to i32
%val2.16 = load atomic i16, i16* %p16 unordered, align 2
%val2 = sext i16 %val2.16 to i32
%res = add i32 %val1, %val2
ret i32 %res
}
define void @store_trunc(i32 %val, i8* %p8, i16* %p16) {
; CHECK-NOLSE-LABEL: store_trunc:
; CHECK-NOLSE: ; %bb.0:
; CHECK-NOLSE-NEXT: stlrb w0, [x1]
; CHECK-NOLSE-NEXT: strh w0, [x2]
; CHECK-NOLSE-NEXT: ret
;
; CHECK-LSE-O1-LABEL: store_trunc:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: stlrb w0, [x1]
; CHECK-LSE-O1-NEXT: strh w0, [x2]
; CHECK-LSE-O1-NEXT: ret
;
; CHECK-LSE-O0-LABEL: store_trunc:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: stlrb w0, [x1]
; CHECK-LSE-O0-NEXT: strh w0, [x2]
; CHECK-LSE-O0-NEXT: ret
%val8 = trunc i32 %val to i8
store atomic i8 %val8, i8* %p8 seq_cst, align 1
%val16 = trunc i32 %val to i16
store atomic i16 %val16, i16* %p16 monotonic, align 2
ret void
}
attributes #0 = { nounwind }