forked from OSchip/llvm-project
[VP] Add widening for VP_STRIDED_LOAD and VP_STRIDED_STORE
Reviewed By: frasercrmck, craig.topper Differential Revision: https://reviews.llvm.org/D121114
This commit is contained in:
parent
ff1d471964
commit
74940d2668
|
@ -947,6 +947,7 @@ private:
|
|||
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
|
||||
SDValue WidenVecRes_LOAD(SDNode* N);
|
||||
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
|
||||
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
|
||||
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
|
||||
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
|
||||
SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N);
|
||||
|
@ -984,6 +985,7 @@ private:
|
|||
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue WidenVecOp_STORE(SDNode* N);
|
||||
SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
|
||||
SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
|
||||
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
|
||||
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
|
||||
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
|
||||
|
|
|
@ -3687,6 +3687,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::VP_LOAD:
|
||||
Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
|
||||
break;
|
||||
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
|
||||
Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
|
||||
break;
|
||||
case ISD::MLOAD:
|
||||
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
|
||||
break;
|
||||
|
@ -4919,6 +4922,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
|
|||
return Res;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
|
||||
SDLoc DL(N);
|
||||
|
||||
// The mask should be widened as well
|
||||
SDValue Mask = N->getMask();
|
||||
assert(getTypeAction(Mask.getValueType()) ==
|
||||
TargetLowering::TypeWidenVector &&
|
||||
"Unable to widen VP strided load");
|
||||
Mask = GetWidenedVector(Mask);
|
||||
|
||||
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
assert(Mask.getValueType().getVectorElementCount() ==
|
||||
WidenVT.getVectorElementCount() &&
|
||||
"Data and mask vectors should have the same number of elements");
|
||||
|
||||
SDValue Res = DAG.getStridedLoadVP(
|
||||
N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(),
|
||||
N->getBasePtr(), N->getOffset(), N->getStride(), Mask,
|
||||
N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(),
|
||||
N->isExpandingLoad());
|
||||
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
|
||||
|
||||
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
|
||||
|
@ -5436,6 +5466,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
|||
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
|
||||
case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
|
||||
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
|
||||
Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
|
||||
break;
|
||||
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
|
||||
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
|
||||
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
|
||||
|
@ -5914,6 +5947,38 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
|
|||
ST->isCompressingStore());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
|
||||
unsigned OpNo) {
|
||||
assert((OpNo == 1 || OpNo == 4) &&
|
||||
"Can widen only data or mask operand of vp_strided_store");
|
||||
VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
|
||||
SDValue Mask = SST->getMask();
|
||||
SDValue StVal = SST->getValue();
|
||||
SDLoc DL(N);
|
||||
|
||||
if (OpNo == 1)
|
||||
assert(getTypeAction(Mask.getValueType()) ==
|
||||
TargetLowering::TypeWidenVector &&
|
||||
"Unable to widen VP strided store");
|
||||
else
|
||||
assert(getTypeAction(StVal.getValueType()) ==
|
||||
TargetLowering::TypeWidenVector &&
|
||||
"Unable to widen VP strided store");
|
||||
|
||||
StVal = GetWidenedVector(StVal);
|
||||
Mask = GetWidenedVector(Mask);
|
||||
|
||||
assert(StVal.getValueType().getVectorElementCount() ==
|
||||
Mask.getValueType().getVectorElementCount() &&
|
||||
"Data and mask vectors should have the same number of elements");
|
||||
|
||||
return DAG.getStridedStoreVP(
|
||||
SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(),
|
||||
SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(),
|
||||
SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(),
|
||||
SST->isCompressingStore());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
|
||||
assert((OpNo == 1 || OpNo == 3) &&
|
||||
"Can widen only data or mask operand of mstore");
|
||||
|
|
|
@ -563,3 +563,40 @@ define <8 x double> @strided_vpload_v8f64(double* %ptr, i32 signext %stride, <8
|
|||
%load = call <8 x double> @llvm.experimental.vp.strided.load.v8f64.p0f64.i32(double* %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
|
||||
ret <8 x double> %load
|
||||
}
|
||||
|
||||
; Widening
|
||||
define <3 x double> @strided_vpload_v3f64(double* %ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpload_v3f64:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
|
||||
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpload_v3f64:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m2, ta, mu
|
||||
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV64-NEXT: ret
|
||||
%v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double* %ptr, i32 %stride, <3 x i1> %mask, i32 %evl)
|
||||
ret <3 x double> %v
|
||||
}
|
||||
|
||||
define <3 x double> @strided_vpload_v3f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpload_v3f64_allones_mask:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
|
||||
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpload_v3f64_allones_mask:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m2, ta, mu
|
||||
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
|
||||
; CHECK-RV64-NEXT: ret
|
||||
%one = insertelement <3 x i1> poison, i1 true, i32 0
|
||||
%allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer
|
||||
%v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double* %ptr, i32 %stride, <3 x i1> %allones, i32 %evl)
|
||||
ret <3 x double> %v
|
||||
}
|
||||
|
||||
declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double*, i32, <3 x i1>, i32)
|
||||
|
|
|
@ -455,3 +455,40 @@ define void @strided_vpstore_v2i8_allones_mask(<2 x i8> %val, i8* %ptr, i32 sign
|
|||
call void @llvm.experimental.vp.strided.store.v2i8.p0i8.i32(<2 x i8> %val, i8* %ptr, i32 %stride, <2 x i1> %b, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Widening
|
||||
define void @strided_vpstore_v3f32(<3 x float> %v, float *%ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpstore_v3f32:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m1, ta, mu
|
||||
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpstore_v3f32:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m1, ta, mu
|
||||
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV64-NEXT: ret
|
||||
call void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float> %v, float* %ptr, i32 %stride, <3 x i1> %mask, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @strided_vpstore_v3f32_allones_mask(<3 x float> %v, float *%ptr, i32 signext %stride, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpstore_v3f32_allones_mask:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m1, ta, mu
|
||||
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpstore_v3f32_allones_mask:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m1, ta, mu
|
||||
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1
|
||||
; CHECK-RV64-NEXT: ret
|
||||
%one = insertelement <3 x i1> poison, i1 true, i32 0
|
||||
%allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float> %v, float* %ptr, i32 %stride, <3 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float>, float* , i32, <3 x i1>, i32)
|
||||
|
|
|
@ -723,3 +723,40 @@ define <vscale x 8 x double> @strided_vpload_nxv8f64(double* %ptr, i32 signext %
|
|||
%load = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.p0f64.i32(double* %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
|
||||
ret <vscale x 8 x double> %load
|
||||
}
|
||||
|
||||
; Widening
|
||||
define <vscale x 3 x double> @strided_vpload_nxv3f64(double* %ptr, i32 signext %stride, <vscale x 3 x i1> %mask, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpload_nxv3f64:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
|
||||
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpload_nxv3f64:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m4, ta, mu
|
||||
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV64-NEXT: ret
|
||||
%v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double* %ptr, i32 %stride, <vscale x 3 x i1> %mask, i32 %evl)
|
||||
ret <vscale x 3 x double> %v
|
||||
}
|
||||
|
||||
define <vscale x 3 x double> @strided_vpload_nxv3f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpload_nxv3f64_allones_mask:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
|
||||
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpload_nxv3f64_allones_mask:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m4, ta, mu
|
||||
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
|
||||
; CHECK-RV64-NEXT: ret
|
||||
%one = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
|
||||
%allones = shufflevector <vscale x 3 x i1> %one, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
|
||||
%v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double* %ptr, i32 %stride, <vscale x 3 x i1> %allones, i32 %evl)
|
||||
ret <vscale x 3 x double> %v
|
||||
}
|
||||
|
||||
declare <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double*, i32, <vscale x 3 x i1>, i32)
|
||||
|
|
|
@ -579,3 +579,40 @@ define void @strided_vpstore_nxv1i8_allones_mask(<vscale x 1 x i8> %val, i8* %pt
|
|||
call void @llvm.experimental.vp.strided.store.nxv1i8.p0i8.i32(<vscale x 1 x i8> %val, i8* %ptr, i32 %strided, <vscale x 1 x i1> %b, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Widening
|
||||
define void @strided_vpstore_nxv3f32(<vscale x 3 x float> %v, float *%ptr, i32 signext %stride, <vscale x 3 x i1> %mask, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpstore_nxv3f32:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, mu
|
||||
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpstore_nxv3f32:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m2, ta, mu
|
||||
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1, v0.t
|
||||
; CHECK-RV64-NEXT: ret
|
||||
call void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32(<vscale x 3 x float> %v, float* %ptr, i32 %stride, <vscale x 3 x i1> %mask, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @strided_vpstore_nxv3f32_allones_mask(<vscale x 3 x float> %v, float *%ptr, i32 signext %stride, i32 zeroext %evl) {
|
||||
; CHECK-RV32-LABEL: strided_vpstore_nxv3f32_allones_mask:
|
||||
; CHECK-RV32: # %bb.0:
|
||||
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, mu
|
||||
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1
|
||||
; CHECK-RV32-NEXT: ret
|
||||
;
|
||||
; CHECK-RV64-LABEL: strided_vpstore_nxv3f32_allones_mask:
|
||||
; CHECK-RV64: # %bb.0:
|
||||
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m2, ta, mu
|
||||
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1
|
||||
; CHECK-RV64-NEXT: ret
|
||||
%one = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
|
||||
%allones = shufflevector <vscale x 3 x i1> %one, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32(<vscale x 3 x float> %v, float* %ptr, i32 %stride, <vscale x 3 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32(<vscale x 3 x float>, float* , i32, <vscale x 3 x i1>, i32)
|
||||
|
|
Loading…
Reference in New Issue