From 0ae6006bee0e77bacce41e5a0c661ad67ea1073d Mon Sep 17 00:00:00 2001 From: Volkan Keles Date: Thu, 15 Aug 2019 23:45:45 +0000 Subject: [PATCH] [GlobalISel] CSEMIRBuilder: Add support for G_GEP Summary: This patch adds G_GEP to `shouldCSEOpc` so that it can be CSEd. It also refactors `translateGetElementPtr` by replacing `createGenericVirtualRegister` calls with types. Reviewers: aditya_nandakumar, arsenm, dsanders, paquette, aemerson Reviewed By: aditya_nandakumar Subscribers: wdng, rovka, javed.absar, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66316 llvm-svn: 369070 --- llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp | 1 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 21 +++----- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 6 +-- .../GlobalISel/arm64-irtranslator-gep.ll | 51 +++++++++++++++++++ .../ARM/GlobalISel/arm-irtranslator.ll | 4 +- .../GlobalISel/arm-legalize-load-store.mir | 16 +++--- 6 files changed, 70 insertions(+), 29 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index 9f81b0af4832..7d9d812d34bc 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_ANYEXT: case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: + case TargetOpcode::G_GEP: return true; } return false; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 81ea7b539be0..e1d0dc718b70 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1076,36 +1076,29 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); - - BaseReg = NewBaseReg; + BaseReg = + MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0); Offset = 0; } Register IdxReg = getOrCreateVReg(*Idx); - if (MRI->getType(IdxReg) != OffsetTy) { - Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); - IdxReg = NewIdxReg; - } + if (MRI->getType(IdxReg) != OffsetTy) + IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. Register GepOffsetReg; if (ElementSize != 1) { - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); auto ElementSizeMIB = MIRBuilder.buildConstant( getLLTForType(*OffsetIRTy, *DL), ElementSize); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); + GepOffsetReg = + MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0); } else GepOffsetReg = IdxReg; - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); - BaseReg = NewBaseReg; + BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0); } } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 7678110d5473..0ec3c9ae9b8d 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -207,11 +207,7 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res, Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); - auto MIB = buildInstr(TargetOpcode::G_GEP); - Res.addDefToMIB(*getMRI(), MIB); - Op0.addSrcToMIB(MIB); - Op1.addSrcToMIB(MIB); - return MIB; + return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1}); } Optional diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll new file mode 100644 index 000000000000..5a4f01c0fd67 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=O0 +; RUN: llc -O3 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=O3 +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--" + +define i32 @cse_gep([4 x i32]* %ptr, i32 %idx) { + ; O0-LABEL: name: cse_gep + ; O0: bb.1 (%ir-block.0): + ; O0: liveins: $w1, $x0 + ; O0: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; O0: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; O0: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; O0: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; O0: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[SEXT]] + ; O0: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) + ; O0: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP]](p0) + ; O0: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.gep1) + ; O0: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[C]], [[SEXT]] + ; O0: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL1]](s64) + ; O0: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; O0: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[GEP1]], [[C1]](s64) + ; O0: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.gep2) + ; O0: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[LOAD1]] + ; O0: $w0 = COPY [[ADD]](s32) + ; O0: RET_ReallyLR implicit $w0 + ; O3-LABEL: name: cse_gep + ; O3: bb.1 (%ir-block.0): + ; O3: liveins: $w1, $x0 + ; O3: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; O3: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; O3: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; O3: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; O3: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[SEXT]] + ; O3: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) + ; O3: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP]](p0) + ; O3: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.gep1) + ; O3: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; O3: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[GEP]], [[C1]](s64) + ; O3: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.gep2) + ; O3: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[LOAD1]] + ; O3: $w0 = COPY [[ADD]](s32) + ; O3: RET_ReallyLR implicit $w0 + %sidx = sext i32 %idx to i64 + %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 %sidx, i64 0 + %v1 = load i32, i32* %gep1 + %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 %sidx, i64 1 + %v2 = load i32, i32* %gep2 + %res = add i32 %v2, %v2 + ret i32 %res +} diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index 616299c3c094..b4205c1fcc8e 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -560,8 +560,8 @@ define void @test_load_store_struct({i32, i32} *%addr) { ; CHECK-DAG: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFSET]](s32) ; CHECK-DAG: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4 from %ir.addr + 4) ; CHECK-DAG: G_STORE [[VAL1]](s32), [[ADDR1]](p0) :: (store 4 into %ir.addr) -; CHECK-DAG: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFFSET]](s32) -; CHECK-DAG: G_STORE [[VAL2]](s32), [[ADDR2]](p0) :: (store 4 into %ir.addr + 4) +; CHECK-DAG: [[ADDR3:%[0-9]+]]:_(p0) = COPY [[ADDR2]] +; CHECK-DAG: G_STORE [[VAL2]](s32), [[ADDR3]](p0) :: (store 4 into %ir.addr + 4) %val = load {i32, i32}, {i32, i32} *%addr, align 4 store {i32, i32} %val, {i32, i32} *%addr, align 4 ret void diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir index 63137071ed68..5b78cd5b6fba 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir @@ -109,9 +109,9 @@ body: | ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] - ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[ADDR2]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4, align 1) ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) - ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4, align 1) %0(p0) = COPY $r0 %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) @@ -146,19 +146,19 @@ body: | ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] - ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[ADDR2]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4, align 1) ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) - ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] - ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4, align 1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[ADDR2]] + ; CHECK-NEXT: G_STORE [[V2]](s32), [[COPY2]](p0) :: (store 4, align 1) %0(p0) = COPY $r0 %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) G_STORE %1(s64), %0(p0) :: (store 8, align 1) ; CHECK: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4) - ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] - ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[ADDR2]] + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY3]](p0) :: (load 4) ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4) - ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_GEP [[ADDR1]], [[OFF]] ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4) %2(s64) = G_LOAD %0(p0) :: (load 8, align 4) G_STORE %2(s64), %0(p0) :: (store 8, align 4)