From aa8b9993c23f9915f1ba694502333f67a627e8d0 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Fri, 26 Jul 2019 23:28:53 +0000 Subject: [PATCH] [AArch64][GlobalISel] Select @llvm.aarch64.stlxr for 32-bit pointers Add partial instruction selection for intrinsics like this: ``` declare i32 @llvm.aarch64.stlxr(i64, i32*) ``` (This only handles the case where a G_ZEXT is feeding the intrinsic.) Also make sure that the added store instruction actually has the memory op from the original G_STORE. Update select-stlxr-intrin.mir and arm64-ldxr-stxr.ll. Differential Revision: https://reviews.llvm.org/D65355 llvm-svn: 367163 --- .../AArch64/AArch64InstructionSelector.cpp | 24 +++++++++++++-- .../GlobalISel/select-stlxr-intrin.mir | 30 ++++++++++++++++++- llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll | 5 ++++ 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 2a45ee662ef7..4f2b4dc7f468 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -3892,7 +3892,9 @@ static unsigned findIntrinsicID(MachineInstr &I) { /// intrinsic. static unsigned getStlxrOpcode(unsigned NumBytesToStore) { switch (NumBytesToStore) { - // TODO: 1, 2, and 4 byte stores. + // TODO: 1 and 2 byte stores + case 4: + return AArch64::STLXRW; case 8: return AArch64::STLXRX; default: @@ -3946,8 +3948,24 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( unsigned Opc = getStlxrOpcode(NumBytesToStore); if (!Opc) return false; - - auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}); + unsigned NumBitsToStore = NumBytesToStore * 8; + if (NumBitsToStore != 64) { + // The intrinsic always has a 64-bit source, but we might actually want + // a differently-sized source for the instruction. Try to get it. + // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's + // just handle 4-byte stores. + // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down + // to the right size for the STLXR. + MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI); + if (!Zext) + return false; + SrcReg = Zext->getOperand(1).getReg(); + // We should get an appropriately-sized register here. + if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore) + return false; + } + auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}) + .addMemOperand(*I.memoperands_begin()); constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir index dd775a20b38e..d60015d5d98e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir @@ -5,6 +5,10 @@ define i32 @test_store_release_i64(i32 %a, i64* %addr) { ret i32 %a } + + define i32 @test_store_release_i32(i32 %a, i64* %addr) { + ret i32 %a + } ... --- name: test_store_release_i64 @@ -20,7 +24,7 @@ body: | ; CHECK: liveins: $w0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 - ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] + ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] :: (volatile store 8 into %ir.addr) ; CHECK: $w0 = COPY %2 ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s64) = COPY $x1 @@ -30,3 +34,27 @@ body: | RET_ReallyLR implicit $w0 ... +--- +name: test_store_release_i32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $x2 + ; CHECK-LABEL: name: test_store_release_i32 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 + ; CHECK: early-clobber %3:gpr32 = STLXRW [[COPY]], [[COPY1]] :: (volatile store 4 into %ir.addr) + ; CHECK: $w0 = COPY %3 + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(p0) = COPY $x2 + %3:gpr(s64) = G_ZEXT %1(s32) + %4:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %3(s64), %2(p0) :: (volatile store 4 into %ir.addr) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll index 4f87121c6ae4..fffcbe9fbf8a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll @@ -248,11 +248,16 @@ define i32 @test_store_release_i16(i32, i16 %val, i16* %addr) { ret i32 %res } +; FALLBACK-NOT: remark:{{.*}}test_store_release_i32 define i32 @test_store_release_i32(i32, i32 %val, i32* %addr) { ; CHECK-LABEL: test_store_release_i32: ; CHECK-NOT: uxtw ; CHECK-NOT: and ; CHECK: stlxr w0, w1, [x2] +; GISEL-LABEL: test_store_release_i32: +; GISEL-NOT: uxtw +; GISEL-NOT: and +; GISEL: stlxr w0, w1, [x2] %extval = zext i32 %val to i64 %res = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr) ret i32 %res