From c4c6b548c5bdd8ca20ff31e061e1370f1c0a4de8 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Tue, 5 Jun 2018 21:03:52 +0000 Subject: [PATCH] [CodeGenPrepare] Move Extension Instructions Through Logical And Shift Instructions CodeGenPrepare pass move extension instructions close to load instructions in different BB, so they can be combined later. But the extension instructions can't move through logical and shift instructions in current implementation. This patch enables this enhancement, so we can eliminate more extension instructions. Differential Revision: https://reviews.llvm.org/D45537 This is re-commit of r331783, which was reverted by r333305. The performance regression was caused by some unlucky alignment, not a code generation problem. llvm-svn: 334049 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++ llvm/test/CodeGen/X86/cmov.ll | 5 +- .../test/CodeGen/X86/ins_subreg_coalesce-1.ll | 2 +- .../CodeGen/X86/zext-logicop-shift-load.ll | 16 --- .../CodeGenPrepare/X86/ext-logicop.ll | 128 ++++++++++++++++++ 5 files changed, 172 insertions(+), 20 deletions(-) create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 8297260cb1e9..faae4a836bb5 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -3390,6 +3390,47 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, (IsSExt && BinOp->hasNoSignedWrap()))) return true; + // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) + if ((Inst->getOpcode() == Instruction::And || + Inst->getOpcode() == Instruction::Or)) + return true; + + // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) + if (Inst->getOpcode() == Instruction::Xor) { + const ConstantInt *Cst = dyn_cast(Inst->getOperand(1)); + // Make sure it is not a NOT. + if (Cst && !Cst->getValue().isAllOnesValue()) + return true; + } + + // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) + // It may change a poisoned value into a regular value, like + // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12 + // poisoned value regular value + // It should be OK since undef covers valid value. + if (Inst->getOpcode() == Instruction::LShr && !IsSExt) + return true; + + // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) + // It may change a poisoned value into a regular value, like + // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12 + // poisoned value regular value + // It should be OK since undef covers valid value. + if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { + const Instruction *ExtInst = + dyn_cast(*Inst->user_begin()); + if (ExtInst->hasOneUse()) { + const Instruction *AndInst = + dyn_cast(*ExtInst->user_begin()); + if (AndInst && AndInst->getOpcode() == Instruction::And) { + const ConstantInt *Cst = dyn_cast(AndInst->getOperand(1)); + if (Cst && + Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) + return true; + } + } + } + // Check if we can do the following simplification. // ext(trunc(opnd)) --> ext(opnd) if (!isa(Inst)) diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 859078a1f728..fb921ee7c27d 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -79,9 +79,8 @@ define i1 @test4() nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movsbl {{.*}}(%rip), %edx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: shrb $7, %al -; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: movzbl %dl, %ecx +; CHECK-NEXT: shrl $7, %ecx ; CHECK-NEXT: xorl $1, %ecx ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: sarl %cl, %edx diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll index e88b3a579c03..3112e1ab4be7 100644 --- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll +++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll @@ -4,7 +4,7 @@ define fastcc i32 @t() nounwind { ; CHECK-LABEL: t: ; CHECK: # %bb.0: # %walkExprTree.exit -; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: movzwl 0, %eax ; CHECK-NEXT: orl $2, %eax ; CHECK-NEXT: movw %ax, 0 ; CHECK-NEXT: shrl $3, %eax diff --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll index 73380eb5ee7f..26182fe3e4cd 100644 --- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll +++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll @@ -88,22 +88,6 @@ entry: ret i64 %1 } -; Don't do the folding if the other operand isn't a constant. -define i64 @test7(i8* %data, i8 %logop) { -; CHECK-LABEL: test7: -; CHECK: movb -; CHECK-NEXT: shrb -; CHECK-NEXT: orb -; CHECK-NEXT: movzbl -; CHECK-NEXT: retq -entry: - %bf.load = load i8, i8* %data, align 4 - %bf.clear = lshr i8 %bf.load, 2 - %0 = or i8 %bf.clear, %logop - %1 = zext i8 %0 to i64 - ret i64 %1 -} - ; Load is folded with sext. define i64 @test8(i8* %data) { ; CHECK-LABEL: test8: diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll b/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll new file mode 100644 index 000000000000..51d1e0ab676e --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll @@ -0,0 +1,128 @@ +; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s + + +@a = global [10 x i8] zeroinitializer, align 1 +declare void @foo() + +; ext(and(ld, cst)) -> and(ext(ld), ext(cst)) +define void @test1(i32* %p, i32 %ll) { +; CHECK-LABEL: @test1 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: and +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %and = and i8 %tmp, 60 + %cmp = icmp ugt i8 %and, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %and to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; ext(or(ld, cst)) -> or(ext(ld), ext(cst)) +define void @test2(i32* %p, i32 %ll) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: or +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %or = or i8 %tmp, 60 + %cmp = icmp ugt i8 %or, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %or to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; ext(and(shl(ld, cst), cst)) -> and(shl(ext(ld), ext(cst)), ext(cst)) +define void @test3(i32* %p, i32 %ll) { +; CHECK-LABEL: @test3 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: shl +; CHECK-NEXT: and +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %shl = shl i8 %tmp, 2 + %and = and i8 %shl, 60 + %cmp = icmp ugt i8 %and, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %and to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; zext(shrl(ld, cst)) -> shrl(zext(ld), zext(cst)) +define void @test4(i32* %p, i32 %ll) { +; CHECK-LABEL: @test4 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: lshr +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %lshr = lshr i8 %tmp, 2 + %cmp = icmp ugt i8 %lshr, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %lshr to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; ext(xor(ld, cst)) -> xor(ext(ld), ext(cst)) +define void @test5(i32* %p, i32 %ll) { +; CHECK-LABEL: @test5 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: xor +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %xor = xor i8 %tmp, 60 + %cmp = icmp ugt i8 %xor, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %xor to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} +