forked from OSchip/llvm-project
[AArch64] Multiply extended 32-bit ints with `[U|S]MADDL'
During instruction selection, the AArch64 backend can recognise the following pattern and generate an [U|S]MADDL instruction, i.e. a multiply of two 32-bit operands with a 64-bit result: (mul (sext i32), (sext i32)) However, when one of the operands is constant, the sign extension gets folded into the constant in SelectionDAG::getNode(). This means that the instruction selection sees this: (mul (sext i32), i64) ...which doesn't match the pattern. Sign-extension and 64-bit multiply instructions are generated, which are slower than one 32-bit multiply. Add a pattern to match this and generate the correct instruction, for both signed and unsigned multiplies. Patch by Chris Diamand! llvm-svn: 259800
This commit is contained in:
parent
e4dff62f64
commit
33b3bd17dd
|
@ -528,6 +528,12 @@ def i64imm_32bit : ImmLeaf<i64, [{
|
|||
return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
|
||||
}]>;
|
||||
|
||||
def s64imm_32bit : ImmLeaf<i64, [{
|
||||
int64_t Imm64 = static_cast<int64_t>(Imm);
|
||||
return Imm64 >= std::numeric_limits<int32_t>::min() &&
|
||||
Imm64 <= std::numeric_limits<int32_t>::max();
|
||||
}]>;
|
||||
|
||||
def trunc_imm : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
|
||||
}]>;
|
||||
|
@ -734,6 +740,40 @@ def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
|
|||
(SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
|
||||
def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
|
||||
(UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
|
||||
|
||||
def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
|
||||
(SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
|
||||
def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
|
||||
(UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
|
||||
def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
|
||||
(SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
|
||||
(MOVi32imm (trunc_imm imm:$C)), XZR)>;
|
||||
|
||||
def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
|
||||
(SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
|
||||
def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
|
||||
(UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
|
||||
def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
|
||||
(SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
|
||||
(MOVi32imm (trunc_imm imm:$C)), XZR)>;
|
||||
|
||||
def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
|
||||
(SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
|
||||
def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
|
||||
(UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
|
||||
def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
|
||||
GPR64:$Ra)),
|
||||
(SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
|
||||
(MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
|
||||
|
||||
def : Pat<(i64 (sub (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
|
||||
(SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
|
||||
def : Pat<(i64 (sub (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
|
||||
(UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
|
||||
def : Pat<(i64 (sub (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
|
||||
GPR64:$Ra)),
|
||||
(SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
|
||||
(MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
|
||||
} // AddedComplexity = 5
|
||||
|
||||
def : MulAccumWAlias<"mul", MADDWrrr>;
|
||||
|
|
|
@ -88,3 +88,55 @@ entry:
|
|||
%tmp4 = sub i64 0, %tmp3
|
||||
ret i64 %tmp4
|
||||
}
|
||||
|
||||
define i64 @t9(i32 %a) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t9:
|
||||
; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
|
||||
%tmp1 = zext i32 %a to i64
|
||||
%tmp2 = mul i64 %tmp1, 139968
|
||||
ret i64 %tmp2
|
||||
}
|
||||
|
||||
; Check 64-bit multiplication is used for constants > 32 bits.
|
||||
define i64 @t10(i32 %a) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t10:
|
||||
; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
|
||||
%tmp1 = sext i32 %a to i64
|
||||
%tmp2 = mul i64 %tmp1, 2147483650 ; = 2^31 + 2
|
||||
ret i64 %tmp2
|
||||
}
|
||||
|
||||
; Check the sext_inreg case.
|
||||
define i64 @t11(i64 %a) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t11:
|
||||
; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
|
||||
%tmp1 = trunc i64 %a to i32
|
||||
%tmp2 = sext i32 %tmp1 to i64
|
||||
%tmp3 = mul i64 %tmp2, -2395238
|
||||
%tmp4 = sub i64 0, %tmp3
|
||||
ret i64 %tmp4
|
||||
}
|
||||
|
||||
define i64 @t12(i64 %a, i64 %b) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t12:
|
||||
; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
|
||||
%tmp1 = trunc i64 %a to i32
|
||||
%tmp2 = sext i32 %tmp1 to i64
|
||||
%tmp3 = mul i64 %tmp2, -34567890
|
||||
%tmp4 = add i64 %b, %tmp3
|
||||
ret i64 %tmp4
|
||||
}
|
||||
|
||||
define i64 @t13(i32 %a, i64 %b) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t13:
|
||||
; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
|
||||
%tmp1 = zext i32 %a to i64
|
||||
%tmp3 = mul i64 %tmp1, 12345678
|
||||
%tmp4 = sub i64 %tmp3, %b
|
||||
ret i64 %tmp4
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue