[PowerPC] Implement low-order Vector Multiply, Modulus and Divide Instructions

This patch aims to implement the low order vector multiply, divide and modulo
instructions available on Power10.

The patch involves legalizing the ISD nodes MUL, UDIV, SDIV, UREM and SREM for
v2i64 and v4i32 vector types in order to utilize the following instructions:
- Vector Multiply Low Doubleword: vmulld
- Vector Modulus Word/Doubleword: vmodsw, vmoduw, vmodsd, vmodud
- Vector Divide Word/Doubleword: vdivsw, vdivsd, vdivuw, vdivud

Differential Revision: https://reviews.llvm.org/D82510
This commit is contained in:
Amy Kwan 2020-07-23 13:12:45 -05:00
parent 38c71b7c85
commit 1dc1a3fb0c
5 changed files with 207 additions and 9 deletions

View File

@ -809,6 +809,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
if (Subtarget.isISA3_1()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
setOperationAction(ISD::UREM, MVT::v2i64, Legal);
setOperationAction(ISD::SREM, MVT::v2i64, Legal);
setOperationAction(ISD::UREM, MVT::v4i32, Legal);
setOperationAction(ISD::SREM, MVT::v4i32, Legal);
}
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);

View File

@ -973,7 +973,8 @@ let Predicates = [IsISA3_1] in {
[(set v16i8:$vD,
(int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmulld $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmulld $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>;
def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmulhsw $vD, $vA, $vB", IIC_VecGeneral, []>;
def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
@ -983,21 +984,29 @@ let Predicates = [IsISA3_1] in {
def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmulhud $vD, $vA, $vB", IIC_VecGeneral, []>;
def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmodsw $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmodsw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>;
def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmoduw $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmoduw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>;
def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmodsd $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmodsd $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>;
def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmodud $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmodud $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>;
def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vdivsw $vD, $vA, $vB", IIC_VecGeneral, []>;
"vdivsw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>;
def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vdivuw $vD, $vA, $vB", IIC_VecGeneral, []>;
"vdivuw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>;
def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vdivsd $vD, $vA, $vB", IIC_VecGeneral, []>;
"vdivsd $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>;
def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vdivud $vD, $vA, $vB", IIC_VecGeneral, []>;
"vdivud $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>;
def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vdivesw $vD, $vA, $vB", IIC_VecGeneral, []>;
def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),

View File

@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; This test case aims to test the vector divide instructions on Power10.
; This includes the low order and extended versions of vector divide,
; that operate on signed and unsigned words and doublewords.
define <2 x i64> @test_vdivud(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vdivud:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vdivud v2, v2, v3
; CHECK-NEXT: blr
entry:
%div = udiv <2 x i64> %a, %b
ret <2 x i64> %div
}
define <2 x i64> @test_vdivsd(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vdivsd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vdivsd v2, v2, v3
; CHECK-NEXT: blr
entry:
%div = sdiv <2 x i64> %a, %b
ret <2 x i64> %div
}
define <4 x i32> @test_vdivuw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vdivuw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vdivuw v2, v2, v3
; CHECK-NEXT: blr
entry:
%div = udiv <4 x i32> %a, %b
ret <4 x i32> %div
}
define <4 x i32> @test_vdivsw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vdivsw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vdivsw v2, v2, v3
; CHECK-NEXT: blr
entry:
%div = sdiv <4 x i32> %a, %b
ret <4 x i32> %div
}

View File

@ -0,0 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; This test case aims to test the vector modulo instructions on Power10.
; The vector modulo instructions operate on signed and unsigned words
; and doublewords.
define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmodud:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmodud v2, v2, v3
; CHECK-NEXT: blr
entry:
%rem = urem <2 x i64> %a, %b
ret <2 x i64> %rem
}
define <2 x i64> @test_vmodsd(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmodsd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmodsd v2, v2, v3
; CHECK-NEXT: blr
entry:
%rem = srem <2 x i64> %a, %b
ret <2 x i64> %rem
}
define <4 x i32> @test_vmoduw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmoduw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmoduw v2, v2, v3
; CHECK-NEXT: blr
entry:
%rem = urem <4 x i32> %a, %b
ret <4 x i32> %rem
}
define <4 x i32> @test_vmodsw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmodsw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmodsw v2, v2, v3
; CHECK-NEXT: blr
entry:
%rem = srem <4 x i32> %a, %b
ret <4 x i32> %rem
}
define <2 x i64> @test_vmodud_with_div(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmodud_with_div:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmodud v4, v2, v3
; CHECK-NEXT: vdivud v2, v2, v3
; CHECK-NEXT: vaddudm v2, v4, v2
; CHECK-NEXT: blr
entry:
%rem = urem <2 x i64> %a, %b
%div = udiv <2 x i64> %a, %b
%add = add <2 x i64> %rem, %div
ret <2 x i64> %add
}
define <2 x i64> @test_vmodsd_with_div(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmodsd_with_div:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmodsd v4, v2, v3
; CHECK-NEXT: vdivsd v2, v2, v3
; CHECK-NEXT: vaddudm v2, v4, v2
; CHECK-NEXT: blr
entry:
%rem = srem <2 x i64> %a, %b
%div = sdiv <2 x i64> %a, %b
%add = add <2 x i64> %rem, %div
ret <2 x i64> %add
}
define <4 x i32> @test_vmoduw_with_div(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmoduw_with_div:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmoduw v4, v2, v3
; CHECK-NEXT: vdivuw v2, v2, v3
; CHECK-NEXT: vadduwm v2, v4, v2
; CHECK-NEXT: blr
entry:
%rem = urem <4 x i32> %a, %b
%div = udiv <4 x i32> %a, %b
%add = add <4 x i32> %rem, %div
ret <4 x i32> %add
}
define <4 x i32> @test_vmodsw_div(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmodsw_div:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmodsw v4, v2, v3
; CHECK-NEXT: vdivsw v2, v2, v3
; CHECK-NEXT: vadduwm v2, v4, v2
; CHECK-NEXT: blr
entry:
%rem = srem <4 x i32> %a, %b
%div = sdiv <4 x i32> %a, %b
%add = add <4 x i32> %rem, %div
ret <4 x i32> %add
}

View File

@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; This test case aims to test the vector multiply instructions on Power10.
define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmulld:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmulld v2, v3, v2
; CHECK-NEXT: blr
entry:
%mul = mul <2 x i64> %b, %a
ret <2 x i64> %mul
}