diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll b/llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll index c1ba9214fdaf..ed9ec4300de9 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll @@ -1,10 +1,15 @@ -; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr8 | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8 +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -mcpu=a2q | FileCheck %s --check-prefixes=CHECK,CHECK-A2Q ; Verify that we do NOT generate the mtctr instruction for loop trip counts < 4 ; The latency of the mtctr is only justified if there are more than 4 comparisons that are removed as a result. @a = common local_unnamed_addr global i32 0, align 4 +@b = common local_unnamed_addr global i32 0, align 4 +@c = common local_unnamed_addr global i32 0, align 4 +@d = common local_unnamed_addr global i32 0, align 4 +@e = common local_unnamed_addr global i32 0, align 4 +@f = common local_unnamed_addr global i32 0, align 4 @arr = common local_unnamed_addr global [5 x i32] zeroinitializer, align 4 ; Function Attrs: norecurse nounwind readonly @@ -113,3 +118,47 @@ for.end: ; preds = %if.end ret i32 %conv } +; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8. +; a2q should use mtctr, but pwr8 should not use mtctr. +define signext i32 @testTripCount5() { +; CHECK-LABEL: testTripCount5: +; CHECK-PWR8: mtctr +; CHECK-A2Q: mtctr + +entry: + %.prea = load i32, i32* @a, align 4 + %.preb = load i32, i32* @b, align 4 + %.prec = load i32, i32* @c, align 4 + %.pred = load i32, i32* @d, align 4 + %.pree = load i32, i32* @e, align 4 + %.pref = load i32, i32* @f, align 4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 2, %entry ], [ %indvars.iv.next, %for.body ] + %0 = phi i32 [ %.prea, %entry ], [ %6, %for.body ] + %1 = phi i32 [ %.preb, %entry ], [ %7, %for.body ] + %2 = phi i32 [ %.prec, %entry ], [ %8, %for.body ] + %3 = phi i32 [ %.pred, %entry ], [ %9, %for.body ] + %4 = phi i32 [ %.pree, %entry ], [ %10, %for.body ] + %5 = phi i32 [ %.pref, %entry ], [ %11, %for.body ] + %6 = add i32 %0, 1 + %7 = add i32 %1, 1 + %8 = add i32 %2, 1 + %9 = add i32 %3, 1 + %10 = add i32 %4, 1 + %11 = add i32 %5, 1 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %tobool = icmp eq i64 %indvars.iv, 0 + br i1 %tobool, label %for.end, label %for.body + +for.end: ; preds = %for.body + store i32 %6, i32* @a, align 4 + store i32 %7, i32* @b, align 4 + store i32 %8, i32* @c, align 4 + store i32 %9, i32* @d, align 4 + store i32 %10, i32* @e, align 4 + store i32 %11, i32* @f, align 4 + ret i32 0 +} +