forked from OSchip/llvm-project
[AArch64][SVEIntrinsicOpts] Fix: predicated SVE mul/fmul are not commutative
We can not swap multiplicand and multiplier because the sve intrinsics are predicated. Imagine lanes in vectors having the following values: pg = 0 multiplicand = 1 (from dup) multiplier = 2 The resulting value should be 1, but if we swap multiplicand and multiplier it will become 2, which is incorrect. Differential Revision: https://reviews.llvm.org/D114577
This commit is contained in:
parent
a3b099b68c
commit
08d45e6f4d
|
@ -833,17 +833,12 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
|
|||
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
|
||||
};
|
||||
|
||||
// The OpMultiplier variable should always point to the dup (if any), so
|
||||
// swap if necessary.
|
||||
if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand))
|
||||
std::swap(OpMultiplier, OpMultiplicand);
|
||||
|
||||
if (IsUnitSplat(OpMultiplier)) {
|
||||
// [f]mul pg (dupx 1) %n => %n
|
||||
// [f]mul pg %n, (dupx 1) => %n
|
||||
OpMultiplicand->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, OpMultiplicand);
|
||||
} else if (IsUnitDup(OpMultiplier)) {
|
||||
// [f]mul pg (dup pg 1) %n => %n
|
||||
// [f]mul pg %n, (dup pg 1) => %n
|
||||
auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
|
||||
auto *DupPg = DupInst->getOperand(1);
|
||||
// TODO: this is naive. The optimization is still valid if DupPg
|
||||
|
|
|
@ -32,7 +32,8 @@ define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale
|
|||
|
||||
define <vscale x 2 x double> @idempotent_fmul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_fmul_different_argument_order(
|
||||
; CHECK-NEXT: ret <vscale x 2 x double> [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[A:%.*]])
|
||||
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
|
||||
; Different argument order to the above tests.
|
||||
|
|
|
@ -32,7 +32,8 @@ define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2
|
|||
|
||||
define <vscale x 2 x i64> @idempotent_mul_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: @idempotent_mul_different_argument_order(
|
||||
; CHECK-NEXT: ret <vscale x 2 x i64> [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> [[A:%.*]])
|
||||
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
|
||||
;
|
||||
%1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1)
|
||||
; Different argument order to the above tests.
|
||||
|
|
Loading…
Reference in New Issue