forked from OSchip/llvm-project
[X86][AVX512] Improve support for signed saturation truncation stores
Matches what we already manage for unsigned saturation truncation stores Differential Revision: https://reviews.llvm.org/D43629 llvm-svn: 326372
This commit is contained in:
parent
3fffe1f02d
commit
72b86586b0
|
@ -34349,6 +34349,20 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
/// Detect a pattern of truncation with signed saturation.
|
||||
/// The types should allow to use VPMOVSS* instruction on AVX512.
|
||||
/// Return the source value to be truncated or SDValue() if the pattern was not
|
||||
/// matched.
|
||||
static SDValue detectAVX512SSatPattern(SDValue In, EVT VT,
|
||||
const X86Subtarget &Subtarget,
|
||||
const TargetLowering &TLI) {
|
||||
if (!TLI.isTypeLegal(In.getValueType()))
|
||||
return SDValue();
|
||||
if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
|
||||
return SDValue();
|
||||
return detectSSatPattern(In, VT);
|
||||
}
|
||||
|
||||
/// Detect a pattern of truncation with saturation:
|
||||
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
|
||||
/// The types should allow to use VPMOVUS* instruction on AVX512.
|
||||
|
@ -34987,6 +35001,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
|||
St->getMemOperand()->getFlags());
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (SDValue Val =
|
||||
detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
|
||||
TLI))
|
||||
return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
|
||||
dl, Val, St->getBasePtr(),
|
||||
St->getMemoryVT(), St->getMemOperand(), DAG);
|
||||
if (SDValue Val =
|
||||
detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
|
||||
TLI))
|
||||
|
|
|
@ -2042,9 +2042,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
|
|||
;
|
||||
; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; AVX512-NEXT: vpmovsqb %zmm0, (%rdi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
|
||||
|
@ -2998,9 +2996,7 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
|||
;
|
||||
; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdb %ymm0, (%rdi)
|
||||
; AVX512VL-NEXT: vpmovsdb %ymm0, (%rdi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
|
@ -3018,9 +3014,7 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
|
|||
;
|
||||
; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi)
|
||||
; AVX512BWVL-NEXT: vpmovsdb %ymm0, (%rdi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
|
||||
|
|
Loading…
Reference in New Issue