forked from OSchip/llvm-project
Revert "[DAGCombiner] Enable SimplifyDemandedBits vector support for TRUNCATE"
It caused "Vector shift amounts must be in the same as their first arg"
asserts in Chromium builds. See the code review for repro instructions.
> Add DemandedElts support inside the TRUNCATE analysis.
>
> Differential Revision: https://reviews.llvm.org/D56387
This reverts commit cad4275d69
.
This commit is contained in:
parent
b270fd59f0
commit
a51226057f
|
@ -11952,7 +11952,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
|||
}
|
||||
|
||||
// Simplify the operands using demanded-bits information.
|
||||
if (SimplifyDemandedBits(SDValue(N, 0)))
|
||||
if (!VT.isVector() &&
|
||||
SimplifyDemandedBits(SDValue(N, 0)))
|
||||
return SDValue(N, 0);
|
||||
|
||||
// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
|
||||
|
|
|
@ -1986,8 +1986,7 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
// zero/one bits live out.
|
||||
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
|
||||
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
|
||||
if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
|
||||
Depth + 1))
|
||||
if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
|
||||
return true;
|
||||
Known = Known.trunc(BitWidth);
|
||||
|
||||
|
@ -2010,9 +2009,9 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
// undesirable.
|
||||
break;
|
||||
|
||||
const APInt *ShAmtC =
|
||||
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
|
||||
if (!ShAmtC)
|
||||
SDValue ShAmt = Src.getOperand(1);
|
||||
auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
|
||||
if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
|
||||
break;
|
||||
uint64_t ShVal = ShAmtC->getZExtValue();
|
||||
|
||||
|
@ -2024,7 +2023,6 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
if (!(HighBits & DemandedBits)) {
|
||||
// None of the shifted in bits are needed. Add a truncate of the
|
||||
// shift input, then shift it.
|
||||
SDValue ShAmt = Src.getOperand(1);
|
||||
if (TLO.LegalTypes())
|
||||
ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
|
||||
SDValue NewTrunc =
|
||||
|
|
|
@ -3399,7 +3399,6 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
|
|||
|
||||
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
|
||||
return N->getOpcode() == ISD::SIGN_EXTEND ||
|
||||
N->getOpcode() == ISD::ANY_EXTEND ||
|
||||
isExtendedBUILD_VECTOR(N, DAG, true);
|
||||
}
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ define <8 x i16> @amull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
|
||||
; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
|
||||
; CHECK-NEXT: bic v0.8h, #255, lsl #8
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
||||
|
@ -113,7 +113,7 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
|
||||
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
|
||||
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -131,7 +131,7 @@ define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr d0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -258,7 +258,7 @@ define <8 x i16> @amlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
|
|||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x2]
|
||||
; CHECK-NEXT: smlal v0.8h, v1.8b, v2.8b
|
||||
; CHECK-NEXT: umlal v0.8h, v1.8b, v2.8b
|
||||
; CHECK-NEXT: bic v0.8h, #255, lsl #8
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i16>, <8 x i16>* %A
|
||||
|
@ -278,7 +278,7 @@ define <4 x i32> @amlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
|
|||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x2]
|
||||
; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h
|
||||
; CHECK-NEXT: umlal v0.4s, v1.4h, v2.4h
|
||||
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -299,7 +299,7 @@ define <2 x i64> @amlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C)
|
|||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x2]
|
||||
; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s
|
||||
; CHECK-NEXT: umlal v0.2d, v1.2s, v2.2s
|
||||
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -428,7 +428,7 @@ define <8 x i16> @amlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
|
|||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x2]
|
||||
; CHECK-NEXT: smlsl v0.8h, v1.8b, v2.8b
|
||||
; CHECK-NEXT: umlsl v0.8h, v1.8b, v2.8b
|
||||
; CHECK-NEXT: bic v0.8h, #255, lsl #8
|
||||
; CHECK-NEXT: ret
|
||||
%tmp1 = load <8 x i16>, <8 x i16>* %A
|
||||
|
@ -448,7 +448,7 @@ define <4 x i32> @amlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
|
|||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x2]
|
||||
; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.4h
|
||||
; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.4h
|
||||
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -469,7 +469,7 @@ define <2 x i64> @amlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C)
|
|||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: ldr d2, [x2]
|
||||
; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.2s
|
||||
; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.2s
|
||||
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -586,7 +586,7 @@ define <8 x i16> @amull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
|
|||
; CHECK-LABEL: amull_extvec_v8i8_v8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi v1.8b, #12
|
||||
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
|
||||
; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
|
||||
; CHECK-NEXT: bic v0.8h, #255, lsl #8
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = zext <8 x i8> %arg to <8 x i16>
|
||||
|
@ -600,7 +600,7 @@ define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #1234
|
||||
; CHECK-NEXT: dup v1.4h, w8
|
||||
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
|
||||
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
|
||||
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -615,7 +615,7 @@ define <2 x i64> @amull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #1234
|
||||
; CHECK-NEXT: dup v1.2s, w8
|
||||
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -752,8 +752,8 @@ define <4 x i64> @smull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
|
|||
define <16 x i16> @amull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
|
||||
; CHECK-LABEL: amull2_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smull v2.8h, v0.8b, v1.8b
|
||||
; CHECK-NEXT: smull2 v1.8h, v0.16b, v1.16b
|
||||
; CHECK-NEXT: umull v2.8h, v0.8b, v1.8b
|
||||
; CHECK-NEXT: umull2 v1.8h, v0.16b, v1.16b
|
||||
; CHECK-NEXT: bic v2.8h, #255, lsl #8
|
||||
; CHECK-NEXT: bic v1.8h, #255, lsl #8
|
||||
; CHECK-NEXT: mov v0.16b, v2.16b
|
||||
|
@ -768,8 +768,8 @@ define <16 x i16> @amull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
|
|||
define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
|
||||
; CHECK-LABEL: amull2_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smull v2.4s, v0.4h, v1.4h
|
||||
; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h
|
||||
; CHECK-NEXT: umull v2.4s, v0.4h, v1.4h
|
||||
; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.8h
|
||||
; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v1.16b, v0.16b, v3.16b
|
||||
; CHECK-NEXT: and v0.16b, v2.16b, v3.16b
|
||||
|
@ -784,8 +784,8 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
|
|||
define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
|
||||
; CHECK-LABEL: amull2_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smull v2.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.4s
|
||||
; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s
|
||||
; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.4s
|
||||
; CHECK-NEXT: movi v3.2d, #0x000000ffffffff
|
||||
; CHECK-NEXT: and v1.16b, v0.16b, v3.16b
|
||||
; CHECK-NEXT: and v0.16b, v2.16b, v3.16b
|
||||
|
|
|
@ -5,7 +5,7 @@ define <4 x i16> @mlai16_trunc(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2
|
|||
; CHECK-LABEL: mlai16_trunc:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
|
||||
; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h
|
||||
; CHECK-NEXT: saddw v0.4s, v0.4s, v2.4h
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -21,7 +21,7 @@ entry:
|
|||
define <4 x i32> @mlai16_and(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
|
||||
; CHECK-LABEL: mlai16_and:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
|
||||
; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h
|
||||
; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h
|
||||
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
|
@ -43,7 +43,7 @@ define void @mlai16_loadstore(i16* %a, i16* %b, i16* %c) {
|
|||
; CHECK-NEXT: ldr d1, [x1, #16]
|
||||
; CHECK-NEXT: ldr d2, [x2, #16]
|
||||
; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
|
||||
; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h
|
||||
; CHECK-NEXT: saddw v0.4s, v0.4s, v2.4h
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: str d0, [x0, #16]
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -89,8 +89,8 @@ entry:
|
|||
define <4 x i32> @addmuli16_and(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
|
||||
; CHECK-LABEL: addmuli16_and:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
|
||||
; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h
|
||||
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
|
||||
; CHECK-NEXT: umlal v1.4s, v0.4h, v2.4h
|
||||
; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -141,7 +141,7 @@ define <2 x i32> @mlai32_trunc(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2
|
|||
; CHECK-LABEL: mlai32_trunc:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
|
||||
; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s
|
||||
; CHECK-NEXT: saddw v0.2d, v0.2d, v2.2s
|
||||
; CHECK-NEXT: xtn v0.2s, v0.2d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -157,7 +157,7 @@ entry:
|
|||
define <2 x i64> @mlai32_and(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2) {
|
||||
; CHECK-LABEL: mlai32_and:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
|
||||
; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
|
||||
; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s
|
||||
; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
|
||||
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
|
||||
|
@ -179,7 +179,7 @@ define void @mlai32_loadstore(i32* %a, i32* %b, i32* %c) {
|
|||
; CHECK-NEXT: ldr d1, [x1, #32]
|
||||
; CHECK-NEXT: ldr d2, [x2, #32]
|
||||
; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
|
||||
; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s
|
||||
; CHECK-NEXT: saddw v0.2d, v0.2d, v2.2s
|
||||
; CHECK-NEXT: xtn v0.2s, v0.2d
|
||||
; CHECK-NEXT: str d0, [x0, #32]
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -225,8 +225,8 @@ entry:
|
|||
define <2 x i64> @addmuli32_and(<2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> %vec2) {
|
||||
; CHECK-LABEL: addmuli32_and:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s
|
||||
; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s
|
||||
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
|
||||
; CHECK-NEXT: umlal v1.2d, v0.2s, v2.2s
|
||||
; CHECK-NEXT: movi v0.2d, #0x000000ffffffff
|
||||
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -359,7 +359,7 @@ define void @func2(i16* %a, i16* %b, i16* %c) {
|
|||
; CHECK-NEXT: str d1, [x1, #16]
|
||||
; CHECK-NEXT: ldr d1, [x2, #16]
|
||||
; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h
|
||||
; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h
|
||||
; CHECK-NEXT: saddw v0.4s, v0.4s, v2.4h
|
||||
; CHECK-NEXT: xtn v0.4h, v0.4s
|
||||
; CHECK-NEXT: str d0, [x0, #16]
|
||||
; CHECK-NEXT: ret
|
||||
|
|
|
@ -240,8 +240,8 @@ define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg)
|
|||
; VI-NEXT: v_add_u32_sdwa v0, vcc, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
|
||||
; VI-NEXT: s_or_b32 s0, s1, 4
|
||||
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s0
|
||||
; VI-NEXT: v_or_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
; VI-NEXT: s_and_b32 s0, s0, 0xff
|
||||
; VI-NEXT: v_or_b32_e32 v2, s0, v0
|
||||
; VI-NEXT: v_mov_b32_e32 v0, 0
|
||||
; VI-NEXT: v_mov_b32_e32 v1, 0
|
||||
; VI-NEXT: flat_store_short v[0:1], v2
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
define arm_aapcs_vfpcc <4 x i16> @mla_args(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
|
||||
; CHECK-LABEL: mla_args:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmull.u16 q8, d1, d0
|
||||
; CHECK-NEXT: vaddw.u16 q8, q8, d2
|
||||
; CHECK-NEXT: vmull.s16 q8, d1, d0
|
||||
; CHECK-NEXT: vaddw.s16 q8, q8, d2
|
||||
; CHECK-NEXT: vmovn.i32 d0, q8
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -24,8 +24,8 @@ define void @mla_loadstore(i16* %a, i16* %b, i16* %c) {
|
|||
; CHECK-NEXT: vldr d16, [r0, #16]
|
||||
; CHECK-NEXT: vldr d17, [r1, #16]
|
||||
; CHECK-NEXT: vldr d18, [r2, #16]
|
||||
; CHECK-NEXT: vmull.u16 q8, d17, d16
|
||||
; CHECK-NEXT: vaddw.u16 q8, q8, d18
|
||||
; CHECK-NEXT: vmull.s16 q8, d17, d16
|
||||
; CHECK-NEXT: vaddw.s16 q8, q8, d18
|
||||
; CHECK-NEXT: vmovn.i32 d16, q8
|
||||
; CHECK-NEXT: vstr d16, [r0, #16]
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -54,8 +54,8 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i16> @addmul_args(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
|
||||
; CHECK-LABEL: addmul_args:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmull.u16 q8, d1, d2
|
||||
; CHECK-NEXT: vmlal.u16 q8, d0, d2
|
||||
; CHECK-NEXT: vmull.s16 q8, d1, d2
|
||||
; CHECK-NEXT: vmlal.s16 q8, d0, d2
|
||||
; CHECK-NEXT: vmovn.i32 d0, q8
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
|
@ -73,9 +73,9 @@ define void @addmul_loadstore(i16* %a, i16* %b, i16* %c) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr d16, [r2, #16]
|
||||
; CHECK-NEXT: vldr d17, [r1, #16]
|
||||
; CHECK-NEXT: vmull.u16 q9, d17, d16
|
||||
; CHECK-NEXT: vmull.s16 q9, d17, d16
|
||||
; CHECK-NEXT: vldr d17, [r0, #16]
|
||||
; CHECK-NEXT: vmlal.u16 q9, d17, d16
|
||||
; CHECK-NEXT: vmlal.s16 q9, d17, d16
|
||||
; CHECK-NEXT: vmovn.i32 d16, q9
|
||||
; CHECK-NEXT: vstr d16, [r0, #16]
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -108,7 +108,7 @@ define void @func1(i16* %a, i16* %b, i16* %c) {
|
|||
; CHECK-NEXT: vldr d18, [r2, #16]
|
||||
; CHECK-NEXT: vld1.16 {d16}, [r3:64]
|
||||
; CHECK-NEXT: vmovl.u16 q8, d16
|
||||
; CHECK-NEXT: vaddw.u16 q10, q8, d18
|
||||
; CHECK-NEXT: vaddw.s16 q10, q8, d18
|
||||
; CHECK-NEXT: vmovn.i32 d19, q10
|
||||
; CHECK-NEXT: vldr d20, [r0, #16]
|
||||
; CHECK-NEXT: vstr d19, [r0, #16]
|
||||
|
@ -119,7 +119,7 @@ define void @func1(i16* %a, i16* %b, i16* %c) {
|
|||
; CHECK-NEXT: vmovn.i32 d16, q11
|
||||
; CHECK-NEXT: vstr d16, [r1, #16]
|
||||
; CHECK-NEXT: vldr d16, [r2, #16]
|
||||
; CHECK-NEXT: vmlal.u16 q11, d16, d20
|
||||
; CHECK-NEXT: vmlal.s16 q11, d16, d20
|
||||
; CHECK-NEXT: vmovn.i32 d16, q11
|
||||
; CHECK-NEXT: vstr d16, [r0, #16]
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -175,26 +175,23 @@ entry:
|
|||
define void @func2(i16* %a, i16* %b, i16* %c) {
|
||||
; CHECK-LABEL: func2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vldr d16, [r1, #16]
|
||||
; CHECK-NEXT: add r3, r0, #16
|
||||
; CHECK-NEXT: vldr d17, [r2, #16]
|
||||
; CHECK-NEXT: vaddl.u16 q9, d17, d16
|
||||
; CHECK-NEXT: vmovn.i32 d18, q9
|
||||
; CHECK-NEXT: vld1.16 {d19}, [r3:64]
|
||||
; CHECK-NEXT: vstr d18, [r0, #16]
|
||||
; CHECK-NEXT: add r3, r1, #16
|
||||
; CHECK-NEXT: vldr d18, [r2, #16]
|
||||
; CHECK-NEXT: vmull.s16 q10, d17, d18
|
||||
; CHECK-NEXT: vmovl.s16 q11, d18
|
||||
; CHECK-NEXT: vld1.16 {d16}, [r3:64]
|
||||
; CHECK-NEXT: vmovl.u16 q8, d16
|
||||
; CHECK-NEXT: vaddw.s16 q10, q8, d18
|
||||
; CHECK-NEXT: vmovn.i32 d19, q10
|
||||
; CHECK-NEXT: vldr d20, [r0, #16]
|
||||
; CHECK-NEXT: vstr d19, [r0, #16]
|
||||
; CHECK-NEXT: vldr d19, [r2, #16]
|
||||
; CHECK-NEXT: vmull.s16 q11, d18, d19
|
||||
; CHECK-NEXT: vmovl.s16 q9, d19
|
||||
; CHECK-NEXT: vmla.i32 q10, q8, q11
|
||||
; CHECK-NEXT: vmovn.i32 d16, q10
|
||||
; CHECK-NEXT: vmla.i32 q11, q8, q9
|
||||
; CHECK-NEXT: vmovn.i32 d16, q11
|
||||
; CHECK-NEXT: vstr d16, [r1, #16]
|
||||
; CHECK-NEXT: add r1, r2, #16
|
||||
; CHECK-NEXT: vld1.16 {d16}, [r1:64]
|
||||
; CHECK-NEXT: vmovl.u16 q8, d16
|
||||
; CHECK-NEXT: vmla.i32 q10, q8, q9
|
||||
; CHECK-NEXT: vadd.i32 q8, q10, q9
|
||||
; CHECK-NEXT: vldr d16, [r2, #16]
|
||||
; CHECK-NEXT: vmlal.s16 q11, d16, d20
|
||||
; CHECK-NEXT: vaddw.s16 q8, q11, d20
|
||||
; CHECK-NEXT: vmovn.i32 d16, q8
|
||||
; CHECK-NEXT: vstr d16, [r0, #16]
|
||||
; CHECK-NEXT: bx lr
|
||||
|
|
|
@ -1503,6 +1503,7 @@ define arm_aapcs_vfpcc void @ssatmul_s4t_q15(i16* nocapture readonly %pSrcA, i16
|
|||
; CHECK-NEXT: vldrht.s32 q3, [r1], #8
|
||||
; CHECK-NEXT: vmul.i32 q2, q3, q2
|
||||
; CHECK-NEXT: vqshrnb.s32 q2, q2, #15
|
||||
; CHECK-NEXT: vmovlb.s16 q2, q2
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vstrht.32 q2, [r2], #8
|
||||
; CHECK-NEXT: le lr, .LBB8_2
|
||||
|
@ -2716,6 +2717,7 @@ define arm_aapcs_vfpcc void @ssatmul_8t_q7(i8* nocapture readonly %pSrcA, i8* no
|
|||
; CHECK-NEXT: vldrbt.s16 q6, [r1], #8
|
||||
; CHECK-NEXT: vmul.i16 q5, q6, q5
|
||||
; CHECK-NEXT: vqshrnb.s16 q5, q5, #7
|
||||
; CHECK-NEXT: vmovlb.s8 q5, q5
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vstrbt.16 q5, [r2], #8
|
||||
; CHECK-NEXT: le lr, .LBB17_2
|
||||
|
|
|
@ -153,7 +153,7 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
|
||||
; CHECK-NEXT: vmov.u16 r1, q1[4]
|
||||
; CHECK-NEXT: vmullb.s16 q2, q3, q2
|
||||
; CHECK-NEXT: vshr.u32 q3, q2, #16
|
||||
; CHECK-NEXT: vshr.s32 q3, q2, #16
|
||||
; CHECK-NEXT: vmov r0, s12
|
||||
; CHECK-NEXT: vmov.16 q2[0], r0
|
||||
; CHECK-NEXT: vmov r0, s13
|
||||
|
@ -174,7 +174,7 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
|
|||
; CHECK-NEXT: vmov.u16 r1, q0[5]
|
||||
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
|
||||
; CHECK-NEXT: vmullb.s16 q0, q1, q3
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #16
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #16
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov.16 q2[4], r0
|
||||
; CHECK-NEXT: vmov r0, s1
|
||||
|
@ -318,7 +318,7 @@ define arm_aapcs_vfpcc <16 x i8> @vmulhs_v16i8(<16 x i8> %s0, <16 x i8> %s1) {
|
|||
; CHECK-NEXT: vmov.u8 r0, q0[7]
|
||||
; CHECK-NEXT: vmov.16 q3[7], r0
|
||||
; CHECK-NEXT: vmullb.s8 q2, q3, q2
|
||||
; CHECK-NEXT: vshr.u16 q3, q2, #8
|
||||
; CHECK-NEXT: vshr.s16 q3, q2, #8
|
||||
; CHECK-NEXT: vmov.u16 r0, q3[0]
|
||||
; CHECK-NEXT: vmov.8 q2[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q3[1]
|
||||
|
@ -368,7 +368,7 @@ define arm_aapcs_vfpcc <16 x i8> @vmulhs_v16i8(<16 x i8> %s0, <16 x i8> %s1) {
|
|||
; CHECK-NEXT: vmov.u8 r0, q0[15]
|
||||
; CHECK-NEXT: vmov.16 q1[7], r0
|
||||
; CHECK-NEXT: vmullb.s8 q0, q1, q3
|
||||
; CHECK-NEXT: vshr.u16 q0, q0, #8
|
||||
; CHECK-NEXT: vshr.s16 q0, q0, #8
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[0]
|
||||
; CHECK-NEXT: vmov.8 q2[8], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, q0[1]
|
||||
|
|
|
@ -252,7 +252,7 @@ define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) {
|
|||
;
|
||||
; AVX2-FAST-LABEL: combine_vec_ashr_trunc_ashr:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <1,3,5,7,u,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-FAST-NEXT: vzeroupper
|
||||
|
|
|
@ -251,8 +251,9 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
|
|||
define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind {
|
||||
; X86-LABEL: signbits_sext_shuffle_sitofp:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; X86-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X86-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
|
@ -262,8 +263,9 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1)
|
|||
;
|
||||
; X64-AVX1-LABEL: signbits_sext_shuffle_sitofp:
|
||||
; X64-AVX1: # %bb.0:
|
||||
; X64-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; X64-AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
||||
; X64-AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
|
@ -273,7 +275,7 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1)
|
|||
;
|
||||
; X64-AVX2-LABEL: signbits_sext_shuffle_sitofp:
|
||||
; X64-AVX2: # %bb.0:
|
||||
; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||
|
|
|
@ -920,9 +920,10 @@ define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vect
|
|||
define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" {
|
||||
; CHECK-LABEL: trunc_v16i32_v16i16_sign:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
|
||||
; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpsrad $16, 32(%rdi), %ymm0
|
||||
; CHECK-NEXT: vpsrad $16, (%rdi), %ymm1
|
||||
; CHECK-NEXT: vpackssdw %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; CHECK-NEXT: retq
|
||||
%a = load <16 x i32>, <16 x i32>* %x
|
||||
%b = ashr <16 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
|
@ -931,20 +932,13 @@ define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-
|
|||
}
|
||||
|
||||
define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" {
|
||||
; CHECK-AVX512-LABEL: trunc_v32i16_v32i8_sign:
|
||||
; CHECK-AVX512: # %bb.0:
|
||||
; CHECK-AVX512-NEXT: vpsrlw $8, 32(%rdi), %ymm0
|
||||
; CHECK-AVX512-NEXT: vpsrlw $8, (%rdi), %ymm1
|
||||
; CHECK-AVX512-NEXT: vpackuswb %ymm0, %ymm1, %ymm0
|
||||
; CHECK-AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; CHECK-AVX512-NEXT: retq
|
||||
;
|
||||
; CHECK-VBMI-LABEL: trunc_v32i16_v32i8_sign:
|
||||
; CHECK-VBMI: # %bb.0:
|
||||
; CHECK-VBMI-NEXT: vmovdqa (%rdi), %ymm1
|
||||
; CHECK-VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63]
|
||||
; CHECK-VBMI-NEXT: vpermi2b 32(%rdi), %ymm1, %ymm0
|
||||
; CHECK-VBMI-NEXT: retq
|
||||
; CHECK-LABEL: trunc_v32i16_v32i8_sign:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsraw $8, 32(%rdi), %ymm0
|
||||
; CHECK-NEXT: vpsraw $8, (%rdi), %ymm1
|
||||
; CHECK-NEXT: vpacksswb %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; CHECK-NEXT: retq
|
||||
%a = load <32 x i16>, <32 x i16>* %x
|
||||
%b = ashr <32 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%c = trunc <32 x i16> %b to <32 x i8>
|
||||
|
|
|
@ -73,7 +73,7 @@ define <8 x i32> @trunc8i64_8i32_ashr(<8 x i64> %a) {
|
|||
;
|
||||
; AVX2-FAST-LABEL: trunc8i64_8i32_ashr:
|
||||
; AVX2-FAST: # %bb.0: # %entry
|
||||
; AVX2-FAST-NEXT: vmovaps {{.*#+}} xmm2 = [1,3,5,7]
|
||||
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,5,7,5,7,6,7]
|
||||
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
|
||||
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
|
||||
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
|
@ -81,7 +81,7 @@ define <8 x i32> @trunc8i64_8i32_ashr(<8 x i64> %a) {
|
|||
;
|
||||
; AVX512-LABEL: trunc8i64_8i32_ashr:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpsraq $32, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
|
@ -383,47 +383,33 @@ entry:
|
|||
}
|
||||
|
||||
define <8 x i16> @trunc8i32_8i16_ashr(<8 x i32> %a) {
|
||||
; SSE2-LABEL: trunc8i32_8i16_ashr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc8i32_8i16_ashr:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: psrad $16, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm0
|
||||
; SSSE3-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc8i32_8i16_ashr:
|
||||
; SSE41: # %bb.0: # %entry
|
||||
; SSE41-NEXT: psrld $16, %xmm1
|
||||
; SSE41-NEXT: psrld $16, %xmm0
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: trunc8i32_8i16_ashr:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: psrad $16, %xmm1
|
||||
; SSE-NEXT: psrad $16, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc8i32_8i16_ashr:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: trunc8i32_8i16_ashr:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc8i32_8i16_ashr:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
|
@ -431,14 +417,14 @@ define <8 x i16> @trunc8i32_8i16_ashr(<8 x i32> %a) {
|
|||
;
|
||||
; AVX512VL-LABEL: trunc8i32_8i16_ashr:
|
||||
; AVX512VL: # %bb.0: # %entry
|
||||
; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc8i32_8i16_ashr:
|
||||
; AVX512BW: # %bb.0: # %entry
|
||||
; AVX512BW-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
|
@ -446,7 +432,7 @@ define <8 x i16> @trunc8i32_8i16_ashr(<8 x i32> %a) {
|
|||
;
|
||||
; AVX512BWVL-LABEL: trunc8i32_8i16_ashr:
|
||||
; AVX512BWVL: # %bb.0: # %entry
|
||||
; AVX512BWVL-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
|
@ -698,52 +684,28 @@ entry:
|
|||
}
|
||||
|
||||
define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
|
||||
; SSE2-LABEL: trunc16i32_16i16_ashr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm3
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc16i32_16i16_ashr:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: psrad $16, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm0
|
||||
; SSSE3-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSSE3-NEXT: psrad $16, %xmm3
|
||||
; SSSE3-NEXT: psrad $16, %xmm2
|
||||
; SSSE3-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSSE3-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSSE3-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc16i32_16i16_ashr:
|
||||
; SSE41: # %bb.0: # %entry
|
||||
; SSE41-NEXT: psrld $16, %xmm3
|
||||
; SSE41-NEXT: psrld $16, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
||||
; SSE41-NEXT: psrld $16, %xmm1
|
||||
; SSE41-NEXT: psrld $16, %xmm0
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSE41-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: trunc16i32_16i16_ashr:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: psrad $16, %xmm3
|
||||
; SSE-NEXT: psrad $16, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: psrad $16, %xmm1
|
||||
; SSE-NEXT: psrad $16, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: movdqu %xmm2, (%rax)
|
||||
; SSE-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc16i32_16i16_ashr:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrad $16, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrad $16, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqu %xmm1, (%rax)
|
||||
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -751,9 +713,9 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
|
|||
;
|
||||
; AVX2-LABEL: trunc16i32_16i16_ashr:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrad $16, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrad $16, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
|
@ -928,64 +890,40 @@ entry:
|
|||
}
|
||||
|
||||
define void @trunc16i32_16i8_ashr(<16 x i32> %a) {
|
||||
; SSE2-LABEL: trunc16i32_16i8_ashr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: psrld $24, %xmm1
|
||||
; SSE2-NEXT: psrld $24, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrld $24, %xmm3
|
||||
; SSE2-NEXT: psrld $24, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc16i32_16i8_ashr:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: psrld $24, %xmm1
|
||||
; SSSE3-NEXT: psrld $24, %xmm0
|
||||
; SSSE3-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: psrld $24, %xmm3
|
||||
; SSSE3-NEXT: psrld $24, %xmm2
|
||||
; SSSE3-NEXT: packuswb %xmm3, %xmm2
|
||||
; SSSE3-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc16i32_16i8_ashr:
|
||||
; SSE41: # %bb.0: # %entry
|
||||
; SSE41-NEXT: psrld $24, %xmm1
|
||||
; SSE41-NEXT: psrld $24, %xmm0
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrld $24, %xmm3
|
||||
; SSE41-NEXT: psrld $24, %xmm2
|
||||
; SSE41-NEXT: packusdw %xmm3, %xmm2
|
||||
; SSE41-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE41-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: trunc16i32_16i8_ashr:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: psrad $24, %xmm1
|
||||
; SSE-NEXT: psrad $24, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: psrad $24, %xmm3
|
||||
; SSE-NEXT: psrad $24, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm2, %xmm0
|
||||
; SSE-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc16i32_16i8_ashr:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpsrld $24, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrld $24, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrad $24, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpsrld $24, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrld $24, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrad $24, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad $24, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: trunc16i32_16i8_ashr:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpsrld $24, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrld $24, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrad $24, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovdqu %xmm0, (%rax)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
|
@ -1146,27 +1084,27 @@ entry:
|
|||
define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
|
||||
; SSE-LABEL: trunc16i16_16i8_ashr:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: psrlw $8, %xmm1
|
||||
; SSE-NEXT: psrlw $8, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE-NEXT: psraw $8, %xmm1
|
||||
; SSE-NEXT: psraw $8, %xmm0
|
||||
; SSE-NEXT: packsswb %xmm1, %xmm0
|
||||
; SSE-NEXT: movdqu %xmm0, (%rax)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc16i16_16i8_ashr:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsraw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: trunc16i16_16i8_ashr:
|
||||
; AVX2: # %bb.0: # %entry
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqu %xmm0, (%rax)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -1189,7 +1127,7 @@ define void @trunc16i16_16i8_ashr(<16 x i16> %a) {
|
|||
;
|
||||
; AVX512BW-LABEL: trunc16i16_16i8_ashr:
|
||||
; AVX512BW: # %bb.0: # %entry
|
||||
; AVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
|
|
Loading…
Reference in New Issue