From 70cdb5b3914803ca89a96a867a47936d049a4b32 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Fri, 24 Nov 2017 14:10:45 +0000 Subject: [PATCH] [CGP] Make optimizeMemoryInst able to combine more kinds of ExtAddrMode fields This patch extends the recent work in optimizeMemoryInst to make it able to combine more ExtAddrMode fields than just the BaseReg. This fixes some benchmark regressions introduced by r309397, where GVN PRE is hoisting a getelementptr such that it can no longer be combined into the addressing mode of the load or store that uses it. Differential Revision: https://reviews.llvm.org/D38133 llvm-svn: 318949 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 106 +++++- .../CodeGenPrepare/ARM/sink-addrmode.ll | 352 +++++++++++++++++- 2 files changed, 445 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index e60e1e2d2943..cacabba5f267 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -202,6 +202,22 @@ static cl::opt AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false), cl::desc("Allow creation of selects in Address sinking.")); +static cl::opt AddrSinkCombineBaseReg( + "addr-sink-combine-base-reg", cl::Hidden, cl::init(true), + cl::desc("Allow combining of BaseReg field in Address sinking.")); + +static cl::opt AddrSinkCombineBaseGV( + "addr-sink-combine-base-gv", cl::Hidden, cl::init(true), + cl::desc("Allow combining of BaseGV field in Address sinking.")); + +static cl::opt AddrSinkCombineBaseOffs( + "addr-sink-combine-base-offs", cl::Hidden, cl::init(true), + cl::desc("Allow combining of BaseOffs field in Address sinking.")); + +static cl::opt AddrSinkCombineScaledReg( + "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), + cl::desc("Allow combining of ScaledReg field in Address sinking.")); + namespace { using SetOfInstrs = SmallPtrSet; @@ -2073,6 +2089,59 @@ struct ExtAddrMode : public TargetLowering::AddrMode { return false; } + + Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) { + switch (Field) { + default: + return nullptr; + case BaseRegField: + return BaseReg; + case BaseGVField: + return BaseGV; + case ScaledRegField: + return ScaledReg; + case BaseOffsField: + return ConstantInt::get(IntPtrTy, BaseOffs); + } + } + + void SetCombinedField(FieldName Field, Value *V, + const SmallVectorImpl &AddrModes) { + switch (Field) { + default: + llvm_unreachable("Unhandled fields are expected to be rejected earlier"); + break; + case ExtAddrMode::BaseRegField: + BaseReg = V; + break; + case ExtAddrMode::BaseGVField: + // A combined BaseGV is an Instruction, not a GlobalValue, so it goes + // in the BaseReg field. + assert(BaseReg == nullptr); + BaseReg = V; + BaseGV = nullptr; + break; + case ExtAddrMode::ScaledRegField: + ScaledReg = V; + // If we have a mix of scaled and unscaled addrmodes then we want scale + // to be the scale and not zero. + if (!Scale) + for (const ExtAddrMode &AM : AddrModes) + if (AM.Scale) { + Scale = AM.Scale; + break; + } + break; + case ExtAddrMode::BaseOffsField: + // The offset is no longer a constant, so it goes in ScaledReg with a + // scale of 1. + assert(ScaledReg == nullptr); + ScaledReg = V; + Scale = 1; + BaseOffs = 0; + break; + } + } }; } // end anonymous namespace @@ -2800,12 +2869,14 @@ public: else if (DifferentField != ThisDifferentField) DifferentField = ExtAddrMode::MultipleFields; - // If NewAddrMode differs in only one dimension then we can handle it by + // If NewAddrMode differs in only one dimension, and that dimension isn't + // the amount that ScaledReg is scaled by, then we can handle it by // inserting a phi/select later on. Even if NewAddMode is the same // we still need to collect it due to original value is different. // And later we will need all original values as anchors during // finding the common Phi node. - if (DifferentField != ExtAddrMode::MultipleFields) { + if (DifferentField != ExtAddrMode::MultipleFields && + DifferentField != ExtAddrMode::ScaleField) { AddrModes.emplace_back(NewAddrMode); return true; } @@ -2833,12 +2904,7 @@ public: if (AllAddrModesTrivial) return false; - if (DisableComplexAddrModes) - return false; - - // For now we support only different base registers. - // TODO: enable others. - if (DifferentField != ExtAddrMode::BaseRegField) + if (!addrModeCombiningAllowed()) return false; // Build a map between to @@ -2848,7 +2914,7 @@ public: Value *CommonValue = findCommon(Map); if (CommonValue) - AddrModes[0].BaseReg = CommonValue; + AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes); return CommonValue != nullptr; } @@ -2862,14 +2928,13 @@ private: // Keep track of keys where the value is null. We will need to replace it // with constant null when we know the common type. SmallVector NullValue; + Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); for (auto &AM : AddrModes) { BasicBlock *BB = nullptr; if (Instruction *I = dyn_cast(AM.OriginalValue)) BB = I->getParent(); - // For now we support only base register as different field. - // TODO: Enable others. - Value *DV = AM.BaseReg; + Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); if (DV) { if (CommonType) assert(CommonType == DV->getType() && "Different types detected!"); @@ -3182,6 +3247,23 @@ private: } } } + + bool addrModeCombiningAllowed() { + if (DisableComplexAddrModes) + return false; + switch (DifferentField) { + default: + return false; + case ExtAddrMode::BaseRegField: + return AddrSinkCombineBaseReg; + case ExtAddrMode::BaseGVField: + return AddrSinkCombineBaseGV; + case ExtAddrMode::BaseOffsField: + return AddrSinkCombineBaseOffs; + case ExtAddrMode::ScaledRegField: + return AddrSinkCombineScaledReg; + } + } }; } // end anonymous namespace diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll index 06a513543c45..0be9c556f934 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll @@ -1,7 +1,194 @@ -; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true < %s | FileCheck %s +; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true -addr-sink-new-phis=true < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +@gv1 = common global i32 0, align 4 +@gv2 = common global i32 0, align 4 + +; Phi selects between ptr and gep with ptr as base and constant offset +define void @test_phi_onegep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_phi_onegep_offset +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ] +; CHECK: phi i32 [ 4, %if.then ], [ 0, %entry ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with same base, different constant offsets +define void @test_phi_twogep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_phi_twogep_offset +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32 [ 8, %if.else ], [ 4, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between ptr and gep with ptr as base and nonconstant offset +define void @test_phi_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) { +; CHECK-LABEL: @test_phi_onegep_nonconst_offset +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ] +; CHECK: phi i32 [ %off, %if.then ], [ 0, %entry ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 %off + br label %if.end + +if.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with same base, different nonconstant offsets +define void @test_phi_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) { +; CHECK-LABEL: @test_phi_twogep_nonconst_offset +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32 [ %off2, %if.else ], [ %off1, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with different base, same constant offset +define void @test_phi_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { +; CHECK-LABEL: @test_phi_twogep_base +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32* [ %ptr2, %if.else ], [ %ptr1, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with different base global variables, same constant offset +define void @test_phi_twogep_base_gv(i32 %value) { +; CHECK-LABEL: @test_phi_twogep_base_gv +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32* [ @gv2, %if.else ], [ @gv1, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between ptr and gep with ptr as base and constant offset +define void @test_select_onegep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_select_onegep_offset +; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep +; CHECK: select i1 %cmp, i32 0, i32 4 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + %select = select i1 %cmp, i32* %ptr, i32* %gep + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with same base, different constant offsets +define void @test_select_twogep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_select_twogep_offset +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32 4, i32 8 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between ptr and gep with ptr as base and nonconstant offset +define void @test_select_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) { +; CHECK-LABEL: @test_select_onegep_nonconst_offset +; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep +; CHECK: select i1 %cmp, i32 0, i32 %off +entry: + %cmp = icmp sgt i32 %value, 0 + %gep = getelementptr inbounds i32, i32* %ptr, i32 %off + %select = select i1 %cmp, i32* %ptr, i32* %gep + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with same base, different nonconstant offsets +define void @test_select_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) { +; CHECK-LABEL: @test_select_twogep_nonconst_offset +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32 %off1, i32 %off2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + ; Select between two geps with different base, same constant offset define void @test_select_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { ; CHECK-LABEL: @test_select_twogep_base @@ -16,3 +203,166 @@ entry: ret void } +; Select between two geps with different base global variables, same constant offset +define void @test_select_twogep_base_gv(i32 %value) { +; CHECK-LABEL: @test_select_twogep_base_gv +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1 + %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; If the phi is in a different block to where the gep will be, the phi goes where +; the original phi was not where the gep is. +; CHECK-LABEL: @test_phi_different_block +; CHECK-LABEL: if1.end +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if1.then ] +; CHECK: phi i32 [ 4, %if1.then ], [ 0, %entry ] +define void @test_phi_different_block(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if1.then, label %if1.end + +if1.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if1.end + +if1.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if1.then ] + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if2.then, label %if2.end + +if2.then: + store i32 %value1, i32* %ptr, align 4 + br label %if2.end + +if2.end: + store i32 %value2, i32* %phi, align 4 + ret void +} + +; A phi with three incoming values should be optimised +; CHECK-LABEL: @test_phi_threegep +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] +; CHECK: phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ], [ 4, %if.then ] +define void @test_phi_threegep(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.end + +if.else.else: + %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] + store i32 %value1, i32* %phi, align 4 + ret void +} + +; A phi with two incoming values but three geps due to nesting should be +; optimised +; CHECK-LABEL: @test_phi_threegep_nested +; CHECK: %[[PHI:[a-z0-9_]+]] = phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ] +; CHECK: phi i32 [ %[[PHI]], %if.else.end ], [ 4, %if.then ] +define void @test_phi_threegep_nested(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.else.end + +if.else.else: + %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3 + br label %if.else.end + +if.else.end: + %gep4 = phi i32* [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] + store i32 %value2, i32* %ptr, align 4 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep4, %if.else.end ] + store i32 %value1, i32* %phi, align 4 + ret void +} + +; A nested select is expected to be optimised +; CHECK-LABEL: @test_nested_select +; CHECK: %[[SELECT:[a-z0-9_]+]] = select i1 %cmp2, i32 4, i32 8 +; CHECK: select i1 %cmp1, i32 4, i32 %[[SELECT]] +define void @test_nested_select(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + %cmp1 = icmp sgt i32 %value1, 0 + %cmp2 = icmp sgt i32 %value2, 0 + %select1 = select i1 %cmp2, i32* %gep1, i32* %gep2 + %select2 = select i1 %cmp1, i32* %gep1, i32* %select1 + store i32 %value1, i32* %select2, align 4 + ret void +} + +; Scaling the offset by a different amount is expected not to be optimised +; CHECK-LABEL: @test_select_different_scale +; CHECK: select i1 %cmp, i32* %gep1, i32* %castgep +define void @test_select_different_scale(i32* %ptr, i32 %value, i32 %off) { +entry: + %cmp = icmp sgt i32 %value, 0 + %castptr = bitcast i32* %ptr to i16* + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off + %gep2 = getelementptr inbounds i16, i16* %castptr, i32 %off + %castgep = bitcast i16* %gep2 to i32* + %select = select i1 %cmp, i32* %gep1, i32* %castgep + store i32 %value, i32* %select, align 4 + ret void +} + +; A select between two values is already the best we can do +; CHECK-LABEL: @test_select_trivial +; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2 +define void @test_select_trivial(i32* %ptr1, i32* %ptr2, i32 %value) { +entey: + %cmp = icmp sgt i32 %value, 0 + %select = select i1 %cmp, i32* %ptr1, i32* %ptr2 + store i32 %value, i32* %select, align 4 + ret void +} + +; A select between two global variables is already the best we can do +; CHECK-LABEL: @test_select_trivial_gv +; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2 +define void @test_select_trivial_gv(i32 %value) { +entey: + %cmp = icmp sgt i32 %value, 0 + %select = select i1 %cmp, i32* @gv1, i32* @gv2 + store i32 %value, i32* %select, align 4 + ret void +}