forked from OSchip/llvm-project
[SLP][X86] Add 32-bit vector stores to help vectorization opportunities
Building on the work on D124284, this patch tags v4i8 and v2i16 vector loads as custom, enabling SLP to try to vectorize these types ending in a partial store (using the SSE MOVD instruction) - we already do something similar for 64-bit vector types. Differential Revision: https://reviews.llvm.org/D127604
This commit is contained in:
parent
f22795de68
commit
e961e05d59
|
@ -1122,6 +1122,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::STORE, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v8i8, Custom);
|
||||
|
||||
// Add 32-bit vector stores to help vectorization opportunities.
|
||||
setOperationAction(ISD::STORE, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v4i8, Custom);
|
||||
|
||||
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
|
||||
|
@ -25520,6 +25524,9 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
if (StoreVT.is32BitVector())
|
||||
return SDValue();
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
assert(StoreVT.is64BitVector() && "Unexpected VT");
|
||||
assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) ==
|
||||
|
|
|
@ -14,28 +14,10 @@
|
|||
define void @add4(ptr noalias nocapture noundef %r, ptr noalias nocapture noundef readonly %a) {
|
||||
; CHECK-LABEL: @add4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[R:%.*]], align 1
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP1]], [[TMP0]]
|
||||
; CHECK-NEXT: store i8 [[ADD]], ptr [[R]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX2_1]], align 1
|
||||
; CHECK-NEXT: [[ADD_1:%.*]] = add i8 [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX2_1]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX2_2]], align 1
|
||||
; CHECK-NEXT: [[ADD_2:%.*]] = add i8 [[TMP5]], [[TMP4]]
|
||||
; CHECK-NEXT: store i8 [[ADD_2]], ptr [[ARRAYIDX2_2]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX2_3]], align 1
|
||||
; CHECK-NEXT: [[ADD_3:%.*]] = add i8 [[TMP7]], [[TMP6]]
|
||||
; CHECK-NEXT: store i8 [[ADD_3]], ptr [[ARRAYIDX2_3]], align 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[R:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], [[TMP0]]
|
||||
; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[R]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -14,24 +14,9 @@
|
|||
define void @and4(ptr noalias nocapture noundef writeonly %dst, ptr noalias nocapture noundef readonly %src) {
|
||||
; CHECK-LABEL: @and4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], -64
|
||||
; CHECK-NEXT: store i8 [[TMP1]], ptr [[DST:%.*]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], -64
|
||||
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
|
||||
; CHECK-NEXT: store i8 [[TMP3]], ptr [[ARRAYIDX3_1]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[TMP4]], -64
|
||||
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
|
||||
; CHECK-NEXT: store i8 [[TMP5]], ptr [[ARRAYIDX3_2]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -64
|
||||
; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
|
||||
; CHECK-NEXT: store i8 [[TMP7]], ptr [[ARRAYIDX3_3]], align 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64>
|
||||
; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -14,28 +14,10 @@
|
|||
define void @add4(ptr noalias nocapture noundef %r, ptr noalias nocapture noundef readonly %a) {
|
||||
; CHECK-LABEL: @add4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[R:%.*]], align 1
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP1]], [[TMP0]]
|
||||
; CHECK-NEXT: store i8 [[MUL]], ptr [[R]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX2_1]], align 1
|
||||
; CHECK-NEXT: [[MUL_1:%.*]] = mul i8 [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: store i8 [[MUL_1]], ptr [[ARRAYIDX2_1]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX2_2]], align 1
|
||||
; CHECK-NEXT: [[MUL_2:%.*]] = mul i8 [[TMP5]], [[TMP4]]
|
||||
; CHECK-NEXT: store i8 [[MUL_2]], ptr [[ARRAYIDX2_2]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
|
||||
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX2_3]], align 1
|
||||
; CHECK-NEXT: [[MUL_3:%.*]] = mul i8 [[TMP7]], [[TMP6]]
|
||||
; CHECK-NEXT: store i8 [[MUL_3]], ptr [[ARRAYIDX2_3]], align 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[R:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP0]]
|
||||
; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[R]], align 1
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue