From 5a594c28315d8b458e626aa2d88de7c1e1b96689 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Fri, 15 Jul 2022 13:46:42 +0100 Subject: [PATCH] [AArch64][SVE] NFC: Add test-case to sve-ptest-removal-cmp* tests This also adds new sve-ptest tests for FP compares that will retain the ptest. This also includes a few other NFC changes: * Added type mangling to ptest.any intrinsic. * Regenerated asm using update_llc_tests script. --- .../AArch64/sve-ptest-removal-cmpeq.ll | 65 +++++++++---- .../AArch64/sve-ptest-removal-cmpeq.mir | 3 + .../AArch64/sve-ptest-removal-cmpge.ll | 65 +++++++++---- .../AArch64/sve-ptest-removal-cmpgt.ll | 66 +++++++++---- .../AArch64/sve-ptest-removal-cmphi.ll | 66 +++++++++---- .../AArch64/sve-ptest-removal-cmphs.ll | 65 +++++++++---- .../AArch64/sve-ptest-removal-cmple.ll | 29 +++--- .../AArch64/sve-ptest-removal-cmplo.ll | 29 +++--- .../AArch64/sve-ptest-removal-cmpls.ll | 29 +++--- .../AArch64/sve-ptest-removal-cmplt.ll | 29 +++--- .../AArch64/sve-ptest-removal-cmpne.ll | 65 +++++++++---- llvm/test/CodeGen/AArch64/sve-ptest.ll | 95 +++++++++++++++++++ 12 files changed, 432 insertions(+), 174 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-ptest.ll diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll index 75b518265d40..48c1255e239c 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,11 +7,27 @@ define i32 @cmpeq_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpeq_nxv16i8: -; CHECK: cmpeq p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpeq.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @cmpeq_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpeq_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpeq.nxv4i32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } @@ -21,12 +38,13 @@ define i32 @cmpeq_nxv16i8( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpeq_imm_nxv16i8: -; CHECK: cmpeq p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpeq.nxv16i8( %pg, %a, zeroinitializer) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) %conv = zext i1 %3 to i32 ret i32 %conv } @@ -37,47 +55,52 @@ define i32 @cmpeq_imm_nxv16i8( %pg, %a) { define i32 @cmpeq_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpeq_wide_nxv16i8: -; CHECK: cmpeq p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpeq.wide.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } define i32 @cmpeq_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpeq_wide_nxv8i16: -; CHECK: cmpeq p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpeq.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } define i32 @cmpeq_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpeq_wide_nxv4i32: -; CHECK: cmpeq p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpeq.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } +declare @llvm.aarch64.sve.cmpeq.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpeq.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpeq.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpeq.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpeq.wide.nxv4i32(, , ) -declare i1 @llvm.aarch64.sve.ptest.any(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir index b41411121e4c..5df55777995a 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py # RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s # Test instruction sequences where PTEST is redundant and thus gets removed. @@ -532,3 +533,5 @@ body: | RET_ReallyLR implicit $w0 ... +## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll index 25ab93ee4bf6..77ee75efed24 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,11 +7,27 @@ define i32 @cmpge_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpge_nxv16i8: -; CHECK: cmpge p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @cmpge_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpge_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } @@ -21,12 +38,13 @@ define i32 @cmpge_nxv16i8( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpge_imm_nxv16i8: -; CHECK: cmpge p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, zeroinitializer) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) %conv = zext i1 %3 to i32 ret i32 %conv } @@ -37,47 +55,52 @@ define i32 @cmpge_imm_nxv16i8( %pg, %a) { define i32 @cmpge_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpge_wide_nxv16i8: -; CHECK: cmpge p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.wide.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } define i32 @cmpge_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpge_wide_nxv8i16: -; CHECK: cmpge p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpge.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } define i32 @cmpge_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpge_wide_nxv4i32: -; CHECK: cmpge p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpge.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } +declare @llvm.aarch64.sve.cmpge.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpge.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpge.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpge.wide.nxv4i32(, , ) -declare i1 @llvm.aarch64.sve.ptest.any(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll index 8a565c031205..f8ca4906e304 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,11 +7,27 @@ define i32 @cmpgt_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpgt_nxv16i8: -; CHECK: cmpgt p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @cmpgt_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpgt_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpgt.nxv4i32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } @@ -21,12 +38,13 @@ define i32 @cmpgt_nxv16i8( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpgt_imm_nxv16i8: -; CHECK: cmpgt p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, %a, zeroinitializer) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) %conv = zext i1 %3 to i32 ret i32 %conv } @@ -37,48 +55,54 @@ define i32 @cmpgt_imm_nxv16i8( %pg, %a) { define i32 @cmpgt_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpgt_wide_nxv16i8: -; CHECK: cmpgt p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.wide.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } define i32 @cmpgt_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpgt_wide_nxv8i16: -; CHECK: cmpgt p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpgt.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } define i32 @cmpgt_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpgt_wide_nxv4i32: -; CHECK: cmpgt p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpgt.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } +declare @llvm.aarch64.sve.cmpgt.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpgt.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpgt.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpgt.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpgt.wide.nxv4i32(, , ) -declare i1 @llvm.aarch64.sve.ptest.any(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll index b749e2421a55..ff9f62784fdd 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,27 +7,45 @@ define i32 @cmphi_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmphi_nxv16i8: -; CHECK: cmphi p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } +define i32 @cmphi_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmphi_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphi.nxv4i32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + + ; ; Immediate Compares ; define i32 @cmphi_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmphi_imm_nxv16i8: -; CHECK: cmphi p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, %a, zeroinitializer) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) %conv = zext i1 %3 to i32 ret i32 %conv } @@ -37,47 +56,52 @@ define i32 @cmphi_imm_nxv16i8( %pg, %a) { define i32 @cmphi_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmphi_wide_nxv16i8: -; CHECK: cmphi p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.wide.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } define i32 @cmphi_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmphi_wide_nxv8i16: -; CHECK: cmphi p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmphi.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } define i32 @cmphi_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmphi_wide_nxv4i32: -; CHECK: cmphi p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmphi.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } +declare @llvm.aarch64.sve.cmphi.nxv4i32(, , ) declare @llvm.aarch64.sve.cmphi.nxv16i8(, , ) declare @llvm.aarch64.sve.cmphi.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.cmphi.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.cmphi.wide.nxv4i32(, , ) -declare i1 @llvm.aarch64.sve.ptest.any(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll index f6d9e70fffe4..7c30aff17201 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,11 +7,27 @@ define i32 @cmphs_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmphs_nxv16i8: -; CHECK: cmphs p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @cmphs_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmphs_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmphs.nxv4i32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } @@ -21,12 +38,13 @@ define i32 @cmphs_nxv16i8( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmphs_imm_nxv16i8: -; CHECK: cmphs p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, %a, zeroinitializer) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) %conv = zext i1 %3 to i32 ret i32 %conv } @@ -37,47 +55,52 @@ define i32 @cmphs_imm_nxv16i8( %pg, %a) { define i32 @cmphs_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmphs_wide_nxv16i8: -; CHECK: cmphs p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.wide.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } define i32 @cmphs_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmphs_wide_nxv8i16: -; CHECK: cmphs p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmphs.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } define i32 @cmphs_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmphs_wide_nxv4i32: -; CHECK: cmphs p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmphs.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } declare @llvm.aarch64.sve.cmphs.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmphs.nxv4i32(, , ) declare @llvm.aarch64.sve.cmphs.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.cmphs.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.cmphs.wide.nxv4i32(, , ) -declare i1 @llvm.aarch64.sve.ptest.any(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll index e3616af95ee9..880d94cf3f1b 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,9 +7,10 @@ define i32 @cmple_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmple_imm_nxv16i8: -; CHECK: cmple p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, zeroinitializer, %a) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) @@ -22,9 +24,10 @@ define i32 @cmple_imm_nxv16i8( %pg, %a) { define i32 @cmple_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmple_wide_nxv16i8: -; CHECK: cmple p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmple p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmple.wide.nxv16i8( %pg, %a, %b) %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) %conv = zext i1 %2 to i32 @@ -33,9 +36,10 @@ define i32 @cmple_wide_nxv16i8( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmple_wide_nxv8i16: -; CHECK: cmple p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmple.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) @@ -46,9 +50,10 @@ define i32 @cmple_wide_nxv8i16( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmple_wide_nxv4i32: -; CHECK: cmple p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmple.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll index 5701b8049150..3379e8637193 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,9 +7,10 @@ define i32 @cmplo_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmplo_imm_nxv16i8: -; CHECK: cmplo p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplo p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphi.nxv16i8( %pg, zeroinitializer, %a) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) @@ -22,9 +24,10 @@ define i32 @cmplo_imm_nxv16i8( %pg, %a) { define i32 @cmplo_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmplo_wide_nxv16i8: -; CHECK: cmplo p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplo p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmplo.wide.nxv16i8( %pg, %a, %b) %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) %conv = zext i1 %2 to i32 @@ -33,9 +36,10 @@ define i32 @cmplo_wide_nxv16i8( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmplo_wide_nxv8i16: -; CHECK: cmplo p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplo p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmplo.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) @@ -46,9 +50,10 @@ define i32 @cmplo_wide_nxv8i16( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmplo_wide_nxv4i32: -; CHECK: cmplo p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplo p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmplo.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll index 5f6d01f7d1f7..dbbabe9e7d0f 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,9 +7,10 @@ define i32 @cmpls_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmpls_imm_nxv16i8: -; CHECK: cmpls p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpls p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmphs.nxv16i8( %pg, zeroinitializer, %a) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) @@ -22,9 +24,10 @@ define i32 @cmpls_imm_nxv16i8( %pg, %a) { define i32 @cmpls_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpls_wide_nxv16i8: -; CHECK: cmpls p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpls p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpls.wide.nxv16i8( %pg, %a, %b) %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) %conv = zext i1 %2 to i32 @@ -33,9 +36,10 @@ define i32 @cmpls_wide_nxv16i8( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmpls_wide_nxv8i16: -; CHECK: cmpls p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpls p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpls.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) @@ -46,9 +50,10 @@ define i32 @cmpls_wide_nxv8i16( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmpls_wide_nxv4i32: -; CHECK: cmpls p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpls p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpls.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll index cee219725366..cf15a3572070 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,9 +7,10 @@ define i32 @cmplt_imm_nxv16i8( %pg, %a) { ; CHECK-LABEL: cmplt_imm_nxv16i8: -; CHECK: cmplt p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpgt.nxv16i8( %pg, zeroinitializer, %a) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) @@ -22,9 +24,10 @@ define i32 @cmplt_imm_nxv16i8( %pg, %a) { define i32 @cmplt_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmplt_wide_nxv16i8: -; CHECK: cmplt p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmplt.wide.nxv16i8( %pg, %a, %b) %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) %conv = zext i1 %2 to i32 @@ -33,9 +36,10 @@ define i32 @cmplt_wide_nxv16i8( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmplt_wide_nxv8i16: -; CHECK: cmplt p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplt p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmplt.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) @@ -46,9 +50,10 @@ define i32 @cmplt_wide_nxv8i16( %pg, %a, %pg, %a, %b) { ; CHECK-LABEL: cmplt_wide_nxv4i32: -; CHECK: cmplt p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmplt p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmplt.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll index 0609d066fef5..a95a9a09b4b2 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s ; @@ -6,11 +7,27 @@ define i32 @cmpne_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpne_nxv16i8: -; CHECK: cmpne p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @cmpne_nxv4i32( %pg, %a, %b) { +; CHECK-LABEL: cmpne_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.cmpne.nxv4i32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } @@ -21,12 +38,13 @@ define i32 @cmpne_nxv16i8( %pg, %a, %pg, %a) { ; CHECK-LABEL: cmpne_imm_nxv16i8: -; CHECK: cmpne p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpne.nxv16i8( %pg, %a, zeroinitializer) %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) - %3 = tail call i1 @llvm.aarch64.sve.ptest.any( %2, %1) + %3 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %2, %1) %conv = zext i1 %3 to i32 ret i32 %conv } @@ -37,47 +55,52 @@ define i32 @cmpne_imm_nxv16i8( %pg, %a) { define i32 @cmpne_wide_nxv16i8( %pg, %a, %b) { ; CHECK-LABEL: cmpne_wide_nxv16i8: -; CHECK: cmpne p0.b, p0/z, z0.b, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %1) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) %conv = zext i1 %2 to i32 ret i32 %conv } define i32 @cmpne_wide_nxv8i16( %pg, %a, %b) { ; CHECK-LABEL: cmpne_wide_nxv8i16: -; CHECK: cmpne p0.h, p0/z, z0.h, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } define i32 @cmpne_wide_nxv4i32( %pg, %a, %b) { ; CHECK-LABEL: cmpne_wide_nxv4i32: -; CHECK: cmpne p0.s, p0/z, z0.s, z1.d -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.d +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %1, %a, %b) %3 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2) - %4 = tail call i1 @llvm.aarch64.sve.ptest.any( %pg, %3) + %4 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %3) %conv = zext i1 %4 to i32 ret i32 %conv } +declare @llvm.aarch64.sve.cmpne.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpne.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpne.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpne.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpne.wide.nxv4i32(, , ) -declare i1 @llvm.aarch64.sve.ptest.any(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(, ) declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest.ll b/llvm/test/CodeGen/AArch64/sve-ptest.ll new file mode 100644 index 000000000000..4c4d8b2ba8f4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ptest.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s + + +; Ensure that the inactive lanes of p1 aren't zeroed, since the FP compare should do that for free. + +define i32 @fcmpeq_nxv4f32( %pg, %a, %b) { +; CHECK-LABEL: fcmpeq_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.fcmpeq.nxv4f32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @fcmpne_nxv4f32( %pg, %a, %b) { +; CHECK-LABEL: fcmpne_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.fcmpne.nxv4f32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @fcmpge_nxv4f32( %pg, %a, %b) { +; CHECK-LABEL: fcmpge_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.fcmpge.nxv4f32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @fcmpgt_nxv4f32( %pg, %a, %b) { +; CHECK-LABEL: fcmpgt_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.fcmpgt.nxv4f32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +define i32 @fcmpuo_nxv4f32( %pg, %a, %b) { +; CHECK-LABEL: fcmpuo_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: and p1.b, p0/z, p0.b, p1.b +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.fcmpuo.nxv4f32( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + +declare @llvm.aarch64.sve.fcmpeq.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcmpne.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcmpge.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcmpgt.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcmpuo.nxv4f32(, , ) + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) + +declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(, ) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1()