diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9ba623acecc1..9febe7add771 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -771,7 +771,7 @@ multiclass sve_int_count_r_x64 opc, string asm, def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))), (!cast(NAME # _D) PPRAny:$Pg, $Rn)>; - // Combine cntp with combine_op + // combine_op(x, cntp(all_active, p)) ==> inst p, x def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred)))), (!cast(NAME # _B) PPRAny:$pred, $Rn)>; def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred)))), @@ -780,6 +780,16 @@ multiclass sve_int_count_r_x64 opc, string asm, (!cast(NAME # _S) PPRAny:$pred, $Rn)>; def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred)))), (!cast(NAME # _D) PPRAny:$pred, $Rn)>; + + // combine_op(x, cntp(p, p)) ==> inst p, x + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 PPRAny:$pred), (nxv16i1 PPRAny:$pred)))), + (!cast(NAME # _B) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 PPRAny:$pred), (nxv8i1 PPRAny:$pred)))), + (!cast(NAME # _H) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv4i1 PPRAny:$pred), (nxv4i1 PPRAny:$pred)))), + (!cast(NAME # _S) PPRAny:$pred, $Rn)>; + def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred)))), + (!cast(NAME # _D) PPRAny:$pred, $Rn)>; } class sve_int_count_v sz8_64, bits<5> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll b/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll index 3539264662eb..eb5122b51f22 100644 --- a/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll @@ -5,8 +5,8 @@ target triple = "aarch64-unknown-linux-gnu" ; INCP -define i64 @cntp_add_nxv16i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_nxv16i1: +define i64 @cntp_add_all_active_nxv16i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: incp x0, p0.b ; CHECK-NEXT: ret @@ -16,8 +16,8 @@ define i64 @cntp_add_nxv16i1(i64 %x, %pg) #0 { ret i64 %add } -define i64 @cntp_add_nxv8i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_nxv8i1: +define i64 @cntp_add_all_active_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: incp x0, p0.h ; CHECK-NEXT: ret @@ -27,8 +27,8 @@ define i64 @cntp_add_nxv8i1(i64 %x, %pg) #0 { ret i64 %add } -define i64 @cntp_add_nxv4i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_nxv4i1: +define i64 @cntp_add_all_active_nxv4i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: incp x0, p0.s ; CHECK-NEXT: ret @@ -38,8 +38,8 @@ define i64 @cntp_add_nxv4i1(i64 %x, %pg) #0 { ret i64 %add } -define i64 @cntp_add_nxv2i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_nxv2i1: +define i64 @cntp_add_all_active_nxv2i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: incp x0, p0.d ; CHECK-NEXT: ret @@ -49,8 +49,8 @@ define i64 @cntp_add_nxv2i1(i64 %x, %pg) #0 { ret i64 %add } -define i64 @cntp_add_all_active_nxv8i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_all_active_nxv8i1: +define i64 @cntp_add_all_active_nxv8i1_via_cast(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast: ; CHECK: // %bb.0: ; CHECK-NEXT: incp x0, p0.h ; CHECK-NEXT: ret @@ -61,8 +61,8 @@ define i64 @cntp_add_all_active_nxv8i1(i64 %x, %pg) #0 { ret i64 %add } -define i64 @cntp_add_nxv2i1_oneuse(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_nxv2i1_oneuse: +define i64 @cntp_add_all_active_nxv2i1_oneuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv2i1_oneuse: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: cntp x8, p1, p0.d @@ -77,10 +77,64 @@ define i64 @cntp_add_nxv2i1_oneuse(i64 %x, %pg) #0 { ret i64 %res } +define i64 @cntp_add_same_active_nxv16i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.b +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %pg) + %add = add i64 %1, %x + ret i64 %add +} + +define i64 @cntp_add_same_active_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.h +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, %pg) + %add = add i64 %1, %x + ret i64 %add +} + +define i64 @cntp_add_same_active_nxv4i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.s +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, %pg) + %add = add i64 %1, %x + ret i64 %add +} + +define i64 @cntp_add_same_active_nxv2i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: incp x0, p0.d +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %add = add i64 %1, %x + ret i64 %add +} + +define i64 @cntp_add_same_active_nxv2i1_oneuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv2i1_oneuse: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.d +; CHECK-NEXT: add x9, x8, x0 +; CHECK-NEXT: madd x0, x8, x0, x9 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %add = add i64 %1, %x + %mul = mul i64 %1, %x + %res = add i64 %add, %mul + ret i64 %res +} + ; DECP -define i64 @cntp_sub_nxv16i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_sub_nxv16i1: +define i64 @cntp_sub_all_active_nxv16i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: decp x0, p0.b ; CHECK-NEXT: ret @@ -90,8 +144,8 @@ define i64 @cntp_sub_nxv16i1(i64 %x, %pg) #0 { ret i64 %sub } -define i64 @cntp_sub_nxv8i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_sub_nxv8i1: +define i64 @cntp_sub_all_active_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: decp x0, p0.h ; CHECK-NEXT: ret @@ -101,8 +155,8 @@ define i64 @cntp_sub_nxv8i1(i64 %x, %pg) #0 { ret i64 %sub } -define i64 @cntp_sub_nxv4i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_sub_nxv4i1: +define i64 @cntp_sub_all_active_nxv4i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: decp x0, p0.s ; CHECK-NEXT: ret @@ -112,8 +166,8 @@ define i64 @cntp_sub_nxv4i1(i64 %x, %pg) #0 { ret i64 %sub } -define i64 @cntp_sub_nxv2i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_sub_nxv2i1: +define i64 @cntp_sub_all_active_nxv2i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: decp x0, p0.d ; CHECK-NEXT: ret @@ -123,8 +177,8 @@ define i64 @cntp_sub_nxv2i1(i64 %x, %pg) #0 { ret i64 %sub } -define i64 @cntp_sub_all_active_nxv8i1(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_sub_all_active_nxv8i1: +define i64 @cntp_sub_all_active_nxv8i1_via_cast(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast: ; CHECK: // %bb.0: ; CHECK-NEXT: decp x0, p0.h ; CHECK-NEXT: ret @@ -135,8 +189,8 @@ define i64 @cntp_sub_all_active_nxv8i1(i64 %x, %pg) #0 { ret i64 %sub } -define i64 @cntp_sub_nxv2i1_multiuse(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_sub_nxv2i1_multiuse: +define i64 @cntp_sub_all_active_nxv2i1_multiuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv2i1_multiuse: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: cntp x8, p1, p0.d @@ -151,6 +205,59 @@ define i64 @cntp_sub_nxv2i1_multiuse(i64 %x, %pg) #0 { ret i64 %res } +define i64 @cntp_sub_same_active_nxv16i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.b +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %pg) + %sub = sub i64 %x, %1 + ret i64 %sub +} + +define i64 @cntp_sub_same_active_nxv8i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.h +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, %pg) + %sub = sub i64 %x, %1 + ret i64 %sub +} + +define i64 @cntp_sub_same_active_nxv4i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.s +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, %pg) + %sub = sub i64 %x, %1 + ret i64 %sub +} + +define i64 @cntp_sub_same_active_nxv2i1(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: decp x0, p0.d +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %sub = sub i64 %x, %1 + ret i64 %sub +} + +define i64 @cntp_sub_same_active_nxv2i1_multiuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv2i1_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.d +; CHECK-NEXT: sub x9, x8, x0 +; CHECK-NEXT: madd x0, x8, x0, x9 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %add = sub i64 %1, %x + %mul = mul i64 %1, %x + %res = add i64 %add, %mul + ret i64 %res +} declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1()