forked from OSchip/llvm-project
[DAGCombine] Basic combines for AVG nodes.
This adds very basic combines for AVG nodes, mostly for constant folding and handling degenerate (zero) cases. The code performs mostly the same transforms as visitMULHS, adjusted for AVG nodes. Constant folding extends to a higher bitwidth and drops the lowest bit. For undef nodes, `avg undef, x` is transformed to x. There is also a transform for `avgfloor x, 0` transforming to `shr x, 1`. Differential Revision: https://reviews.llvm.org/D119559
This commit is contained in:
parent
a87d3ba61c
commit
03380c70ed
|
@ -426,6 +426,7 @@ namespace {
|
||||||
SDValue visitREM(SDNode *N);
|
SDValue visitREM(SDNode *N);
|
||||||
SDValue visitMULHU(SDNode *N);
|
SDValue visitMULHU(SDNode *N);
|
||||||
SDValue visitMULHS(SDNode *N);
|
SDValue visitMULHS(SDNode *N);
|
||||||
|
SDValue visitAVG(SDNode *N);
|
||||||
SDValue visitSMUL_LOHI(SDNode *N);
|
SDValue visitSMUL_LOHI(SDNode *N);
|
||||||
SDValue visitUMUL_LOHI(SDNode *N);
|
SDValue visitUMUL_LOHI(SDNode *N);
|
||||||
SDValue visitMULO(SDNode *N);
|
SDValue visitMULO(SDNode *N);
|
||||||
|
@ -1635,6 +1636,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
|
||||||
case ISD::UREM: return visitREM(N);
|
case ISD::UREM: return visitREM(N);
|
||||||
case ISD::MULHU: return visitMULHU(N);
|
case ISD::MULHU: return visitMULHU(N);
|
||||||
case ISD::MULHS: return visitMULHS(N);
|
case ISD::MULHS: return visitMULHS(N);
|
||||||
|
case ISD::AVGFLOORS:
|
||||||
|
case ISD::AVGFLOORU:
|
||||||
|
case ISD::AVGCEILS:
|
||||||
|
case ISD::AVGCEILU: return visitAVG(N);
|
||||||
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
|
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
|
||||||
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
|
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
|
||||||
case ISD::SMULO:
|
case ISD::SMULO:
|
||||||
|
@ -4654,6 +4659,46 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue DAGCombiner::visitAVG(SDNode *N) {
|
||||||
|
unsigned Opcode = N->getOpcode();
|
||||||
|
SDValue N0 = N->getOperand(0);
|
||||||
|
SDValue N1 = N->getOperand(1);
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
SDLoc DL(N);
|
||||||
|
|
||||||
|
// fold (avg c1, c2)
|
||||||
|
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
|
||||||
|
return C;
|
||||||
|
|
||||||
|
// canonicalize constant to RHS.
|
||||||
|
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
|
||||||
|
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
|
||||||
|
return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
|
||||||
|
|
||||||
|
if (VT.isVector()) {
|
||||||
|
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
|
||||||
|
return FoldedVOp;
|
||||||
|
|
||||||
|
// fold (avgfloor x, 0) -> x >> 1
|
||||||
|
if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
|
||||||
|
if (Opcode == ISD::AVGFLOORS)
|
||||||
|
return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
|
||||||
|
if (Opcode == ISD::AVGFLOORU)
|
||||||
|
return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fold (avg x, undef) -> x
|
||||||
|
if (N0.isUndef())
|
||||||
|
return N1;
|
||||||
|
if (N1.isUndef())
|
||||||
|
return N0;
|
||||||
|
|
||||||
|
// TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
|
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
|
||||||
/// give the opcodes for the two computations that are being performed. Return
|
/// give the opcodes for the two computations that are being performed. Return
|
||||||
/// true if a simplification was made.
|
/// true if a simplification was made.
|
||||||
|
|
|
@ -5274,6 +5274,30 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
|
||||||
APInt C2Ext = C2.zext(FullWidth);
|
APInt C2Ext = C2.zext(FullWidth);
|
||||||
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
|
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
|
||||||
}
|
}
|
||||||
|
case ISD::AVGFLOORS: {
|
||||||
|
unsigned FullWidth = C1.getBitWidth() + 1;
|
||||||
|
APInt C1Ext = C1.sext(FullWidth);
|
||||||
|
APInt C2Ext = C2.sext(FullWidth);
|
||||||
|
return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
|
||||||
|
}
|
||||||
|
case ISD::AVGFLOORU: {
|
||||||
|
unsigned FullWidth = C1.getBitWidth() + 1;
|
||||||
|
APInt C1Ext = C1.zext(FullWidth);
|
||||||
|
APInt C2Ext = C2.zext(FullWidth);
|
||||||
|
return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
|
||||||
|
}
|
||||||
|
case ISD::AVGCEILS: {
|
||||||
|
unsigned FullWidth = C1.getBitWidth() + 1;
|
||||||
|
APInt C1Ext = C1.sext(FullWidth);
|
||||||
|
APInt C2Ext = C2.sext(FullWidth);
|
||||||
|
return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
|
||||||
|
}
|
||||||
|
case ISD::AVGCEILU: {
|
||||||
|
unsigned FullWidth = C1.getBitWidth() + 1;
|
||||||
|
APInt C1Ext = C1.zext(FullWidth);
|
||||||
|
APInt C2Ext = C2.zext(FullWidth);
|
||||||
|
return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return llvm::None;
|
return llvm::None;
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,8 +135,7 @@ define <8 x i16> @haddu_i_const_lhs(<8 x i16> %src1) {
|
||||||
define <8 x i16> @haddu_i_const_zero(<8 x i16> %src1) {
|
define <8 x i16> @haddu_i_const_zero(<8 x i16> %src1) {
|
||||||
; CHECK-LABEL: haddu_i_const_zero:
|
; CHECK-LABEL: haddu_i_const_zero:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
; CHECK-NEXT: ushr v0.8h, v0.8h, #1
|
||||||
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
|
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -145,9 +144,7 @@ define <8 x i16> @haddu_i_const_zero(<8 x i16> %src1) {
|
||||||
define <8 x i16> @haddu_i_const_both() {
|
define <8 x i16> @haddu_i_const_both() {
|
||||||
; CHECK-LABEL: haddu_i_const_both:
|
; CHECK-LABEL: haddu_i_const_both:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v0.8h, #1
|
; CHECK-NEXT: movi v0.8h, #2
|
||||||
; CHECK-NEXT: movi v1.8h, #3
|
|
||||||
; CHECK-NEXT: uhadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -156,18 +153,16 @@ define <8 x i16> @haddu_i_const_both() {
|
||||||
define <8 x i16> @haddu_i_const_bothhigh() {
|
define <8 x i16> @haddu_i_const_bothhigh() {
|
||||||
; CHECK-LABEL: haddu_i_const_bothhigh:
|
; CHECK-LABEL: haddu_i_const_bothhigh:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
|
; CHECK-NEXT: mvni v0.8h, #1
|
||||||
; CHECK-NEXT: mvni v1.8h, #1
|
|
||||||
; CHECK-NEXT: uhadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
|
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @haddu_i_undef(<8 x i16> %src1) {
|
define <8 x i16> @haddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
|
||||||
; CHECK-LABEL: haddu_i_undef:
|
; CHECK-LABEL: haddu_i_undef:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: uhadd v0.8h, v0.8h, v0.8h
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -312,8 +307,7 @@ define <8 x i16> @hadds_i_const_lhs(<8 x i16> %src1) {
|
||||||
define <8 x i16> @hadds_i_const_zero(<8 x i16> %src1) {
|
define <8 x i16> @hadds_i_const_zero(<8 x i16> %src1) {
|
||||||
; CHECK-LABEL: hadds_i_const_zero:
|
; CHECK-LABEL: hadds_i_const_zero:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
; CHECK-NEXT: sshr v0.8h, v0.8h, #1
|
||||||
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
|
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -322,9 +316,7 @@ define <8 x i16> @hadds_i_const_zero(<8 x i16> %src1) {
|
||||||
define <8 x i16> @hadds_i_const_both() {
|
define <8 x i16> @hadds_i_const_both() {
|
||||||
; CHECK-LABEL: hadds_i_const_both:
|
; CHECK-LABEL: hadds_i_const_both:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v0.8h, #1
|
; CHECK-NEXT: movi v0.8h, #2
|
||||||
; CHECK-NEXT: movi v1.8h, #3
|
|
||||||
; CHECK-NEXT: shadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -334,18 +326,16 @@ define <8 x i16> @hadds_i_const_bothhigh() {
|
||||||
; CHECK-LABEL: hadds_i_const_bothhigh:
|
; CHECK-LABEL: hadds_i_const_bothhigh:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov w8, #32766
|
; CHECK-NEXT: mov w8, #32766
|
||||||
; CHECK-NEXT: mvni v0.8h, #128, lsl #8
|
; CHECK-NEXT: dup v0.8h, w8
|
||||||
; CHECK-NEXT: dup v1.8h, w8
|
|
||||||
; CHECK-NEXT: shadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
|
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @hadds_i_undef(<8 x i16> %src1) {
|
define <8 x i16> @hadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
|
||||||
; CHECK-LABEL: hadds_i_undef:
|
; CHECK-LABEL: hadds_i_undef:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: shadd v0.8h, v0.8h, v0.8h
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -508,9 +498,7 @@ define <8 x i16> @rhaddu_i_const_zero(<8 x i16> %src1) {
|
||||||
define <8 x i16> @rhaddu_i_const_both() {
|
define <8 x i16> @rhaddu_i_const_both() {
|
||||||
; CHECK-LABEL: rhaddu_i_const_both:
|
; CHECK-LABEL: rhaddu_i_const_both:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v0.8h, #1
|
; CHECK-NEXT: movi v0.8h, #2
|
||||||
; CHECK-NEXT: movi v1.8h, #3
|
|
||||||
; CHECK-NEXT: urhadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -520,17 +508,15 @@ define <8 x i16> @rhaddu_i_const_bothhigh() {
|
||||||
; CHECK-LABEL: rhaddu_i_const_bothhigh:
|
; CHECK-LABEL: rhaddu_i_const_bothhigh:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
|
; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
|
||||||
; CHECK-NEXT: mvni v1.8h, #1
|
|
||||||
; CHECK-NEXT: urhadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
|
%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @rhaddu_i_undef(<8 x i16> %src1) {
|
define <8 x i16> @rhaddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
|
||||||
; CHECK-LABEL: rhaddu_i_undef:
|
; CHECK-LABEL: rhaddu_i_undef:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: urhadd v0.8h, v0.8h, v0.8h
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -693,9 +679,7 @@ define <8 x i16> @rhadds_i_const_zero(<8 x i16> %src1) {
|
||||||
define <8 x i16> @rhadds_i_const_both() {
|
define <8 x i16> @rhadds_i_const_both() {
|
||||||
; CHECK-LABEL: rhadds_i_const_both:
|
; CHECK-LABEL: rhadds_i_const_both:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v0.8h, #1
|
; CHECK-NEXT: movi v0.8h, #2
|
||||||
; CHECK-NEXT: movi v1.8h, #3
|
|
||||||
; CHECK-NEXT: srhadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
@ -704,19 +688,16 @@ define <8 x i16> @rhadds_i_const_both() {
|
||||||
define <8 x i16> @rhadds_i_const_bothhigh() {
|
define <8 x i16> @rhadds_i_const_bothhigh() {
|
||||||
; CHECK-LABEL: rhadds_i_const_bothhigh:
|
; CHECK-LABEL: rhadds_i_const_bothhigh:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov w8, #32766
|
|
||||||
; CHECK-NEXT: mvni v0.8h, #128, lsl #8
|
; CHECK-NEXT: mvni v0.8h, #128, lsl #8
|
||||||
; CHECK-NEXT: dup v1.8h, w8
|
|
||||||
; CHECK-NEXT: srhadd v0.8h, v1.8h, v0.8h
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
|
%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @rhadds_i_undef(<8 x i16> %src1) {
|
define <8 x i16> @rhadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
|
||||||
; CHECK-LABEL: rhadds_i_undef:
|
; CHECK-LABEL: rhadds_i_undef:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: srhadd v0.8h, v0.8h, v0.8h
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
|
||||||
ret <8 x i16> %result
|
ret <8 x i16> %result
|
||||||
|
|
|
@ -7,10 +7,9 @@ define void @f() nounwind {
|
||||||
; CHECK-NEXT: calll L0$pb
|
; CHECK-NEXT: calll L0$pb
|
||||||
; CHECK-NEXT: L0$pb:
|
; CHECK-NEXT: L0$pb:
|
||||||
; CHECK-NEXT: popl %eax
|
; CHECK-NEXT: popl %eax
|
||||||
; CHECK-NEXT: pxor %xmm0, %xmm0
|
|
||||||
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
||||||
; CHECK-NEXT: psllw {{\.?LCPI[0-9]+_[0-9]+}}-L0$pb(%eax), %xmm1
|
; CHECK-NEXT: psllw {{\.?LCPI[0-9]+_[0-9]+}}-L0$pb(%eax), %xmm1
|
||||||
; CHECK-NEXT: pavgw {{\.?LCPI[0-9]+_[0-9]+}}-L0$pb(%eax), %xmm0
|
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [21183,21183,21183,21183,21183,21183,21183,21183]
|
||||||
; CHECK-NEXT: paddsw %xmm0, %xmm0
|
; CHECK-NEXT: paddsw %xmm0, %xmm0
|
||||||
; CHECK-NEXT: paddw %xmm1, %xmm0
|
; CHECK-NEXT: paddw %xmm1, %xmm0
|
||||||
; CHECK-NEXT: .p2align 4, 0x90
|
; CHECK-NEXT: .p2align 4, 0x90
|
||||||
|
|
Loading…
Reference in New Issue