[Hexagon] Remove support for V4

llvm-svn: 344791
This commit is contained in:
Krzysztof Parzyszek 2018-10-19 17:31:11 +00:00
parent ce3f1915f3
commit 6bfc6577f2
54 changed files with 1822 additions and 2036 deletions

View File

@ -73,4 +73,3 @@ add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(MCTargetDesc)
add_subdirectory(TargetInfo)

View File

@ -323,31 +323,27 @@ class Proc<string Name, SchedMachineModel Model,
: ProcessorModel<Name, Model, Features>;
def : Proc<"generic", HexagonModelV60,
[ArchV4, ArchV5, ArchV55, ArchV60,
[ArchV5, ArchV55, ArchV60,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv4", HexagonModelV4,
[ArchV4,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv5", HexagonModelV4,
[ArchV4, ArchV5,
def : Proc<"hexagonv5", HexagonModelV5,
[ArchV5,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv55", HexagonModelV55,
[ArchV4, ArchV5, ArchV55,
[ArchV5, ArchV55,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv60", HexagonModelV60,
[ArchV4, ArchV5, ArchV55, ArchV60,
[ArchV5, ArchV55, ArchV60,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv62", HexagonModelV62,
[ArchV4, ArchV5, ArchV55, ArchV60, ArchV62,
[ArchV5, ArchV55, ArchV60, ArchV62,
FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv65", HexagonModelV65,
[ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
[ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
FeatureNVS, FeaturePackets, FeatureSmallData]>;

View File

@ -555,8 +555,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
continue;
// Check that the two instructions are combinable. V4 allows more
// instructions to be merged into a combine.
// Check that the two instructions are combinable.
// The order matters because in a A2_tfrsi we might can encode a int8 as
// the hi reg operand but only a uint6 as the low reg operand.
if ((IsI2LowReg && !areCombinableOperations(TRI, I1, *I2, AllowC64)) ||

View File

@ -15,7 +15,7 @@
#define HEXAGON_DEP_ARCH_H
namespace llvm {
namespace Hexagon {
enum class ArchEnum { V4,V5,V55,V60,V62,V65 };
enum class ArchEnum { NoArch,Generic,V5,V55,V60,V62,V65 };
} // namespace Hexagon
} // namespace llvm;
#endif // HEXAGON_DEP_ARCH_H

View File

@ -18,7 +18,4 @@ def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V
def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">;
def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">;
def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">;
def HasV4 : Predicate<"HST->hasV4Ops()">, AssemblerPredicate<"ArchV4">;
def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">;
def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">;

View File

@ -991,7 +991,7 @@ def A2_roundsat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = round($Rss32):sat",
tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_c2f7d806, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@ -3314,7 +3314,7 @@ def A5_vaddhubs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vaddhub($Rss32,$Rtt32):sat",
tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_3op>, Enc_d2216a {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@ -4059,7 +4059,7 @@ def F2_conv_d2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_d2df($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@ -4069,7 +4069,7 @@ def F2_conv_d2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_d2sf($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@ -4081,7 +4081,7 @@ def F2_conv_df2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@ -4091,7 +4091,7 @@ def F2_conv_df2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@ -4101,7 +4101,7 @@ def F2_conv_df2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2sf($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@ -4113,7 +4113,7 @@ def F2_conv_df2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@ -4123,7 +4123,7 @@ def F2_conv_df2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@ -4133,7 +4133,7 @@ def F2_conv_df2uw : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@ -4145,7 +4145,7 @@ def F2_conv_df2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000101;
let hasNewValue = 1;
@ -4157,7 +4157,7 @@ def F2_conv_df2w : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@ -4169,7 +4169,7 @@ def F2_conv_df2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@ -4181,7 +4181,7 @@ def F2_conv_sf2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@ -4191,7 +4191,7 @@ def F2_conv_sf2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@ -4201,7 +4201,7 @@ def F2_conv_sf2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2df($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@ -4211,7 +4211,7 @@ def F2_conv_sf2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@ -4221,7 +4221,7 @@ def F2_conv_sf2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@ -4231,7 +4231,7 @@ def F2_conv_sf2uw : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@ -4243,7 +4243,7 @@ def F2_conv_sf2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@ -4255,7 +4255,7 @@ def F2_conv_sf2w : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@ -4267,7 +4267,7 @@ def F2_conv_sf2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32):chop",
tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@ -4279,7 +4279,7 @@ def F2_conv_ud2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_ud2df($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@ -4289,7 +4289,7 @@ def F2_conv_ud2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_ud2sf($Rss32)",
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000001;
let hasNewValue = 1;
@ -4301,7 +4301,7 @@ def F2_conv_uw2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_uw2df($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@ -4311,7 +4311,7 @@ def F2_conv_uw2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_uw2sf($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011001;
let hasNewValue = 1;
@ -4323,7 +4323,7 @@ def F2_conv_w2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_w2df($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@ -4333,7 +4333,7 @@ def F2_conv_w2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_w2sf($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011010;
let hasNewValue = 1;
@ -4345,7 +4345,7 @@ def F2_dfclass : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Pd4 = dfclass($Rss32,#$Ii)",
tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5]> {
tc_7a830544, TypeALU64>, Enc_1f19b5 {
let Inst{4-2} = 0b100;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b11011100100;
@ -4356,7 +4356,7 @@ def F2_dfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@ -4368,7 +4368,7 @@ def F2_dfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@ -4380,7 +4380,7 @@ def F2_dfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@ -4392,7 +4392,7 @@ def F2_dfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@ -4404,7 +4404,7 @@ def F2_dfimm_n : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):neg",
tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
tc_234a11a5, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100101;
let prefersSlot3 = 1;
@ -4413,7 +4413,7 @@ def F2_dfimm_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):pos",
tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
tc_234a11a5, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100100;
let prefersSlot3 = 1;
@ -4422,7 +4422,7 @@ def F2_sfadd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfadd($Rs32,$Rt32)",
tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
tc_6792d5ff, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@ -4436,7 +4436,7 @@ def F2_sfclass : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = sfclass($Rs32,#$Ii)",
tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5]> {
tc_7a830544, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101111;
@ -4447,7 +4447,7 @@ def F2_sfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.eq($Rs32,$Rt32)",
tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
tc_1e856f58, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@ -4459,7 +4459,7 @@ def F2_sfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.ge($Rs32,$Rt32)",
tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
tc_1e856f58, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@ -4471,7 +4471,7 @@ def F2_sfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.gt($Rs32,$Rt32)",
tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
tc_1e856f58, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@ -4483,7 +4483,7 @@ def F2_sfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.uo($Rs32,$Rt32)",
tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
tc_1e856f58, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@ -4495,7 +4495,7 @@ def F2_sffixupd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupd($Rs32,$Rt32)",
tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
tc_6792d5ff, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@ -4507,7 +4507,7 @@ def F2_sffixupn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupn($Rs32,$Rt32)",
tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
tc_6792d5ff, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@ -4519,7 +4519,7 @@ def F2_sffixupr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sffixupr($Rs32)",
tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011101;
let hasNewValue = 1;
@ -4530,7 +4530,7 @@ def F2_sffma : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32)",
tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
tc_d580173f, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@ -4544,7 +4544,7 @@ def F2_sffma_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32):lib",
tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
tc_d580173f, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@ -4558,7 +4558,7 @@ def F2_sffma_sc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
"$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5]> {
tc_038a1342, TypeM>, Enc_437f33 {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@ -4572,7 +4572,7 @@ def F2_sffms : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32)",
tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
tc_d580173f, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@ -4586,7 +4586,7 @@ def F2_sffms_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32):lib",
tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
tc_d580173f, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@ -4600,7 +4600,7 @@ def F2_sfimm_n : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):neg",
tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
tc_234a11a5, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011001;
let hasNewValue = 1;
@ -4611,7 +4611,7 @@ def F2_sfimm_p : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):pos",
tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
tc_234a11a5, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011000;
let hasNewValue = 1;
@ -4622,7 +4622,7 @@ def F2_sfinvsqrta : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32),
"$Rd32,$Pe4 = sfinvsqrta($Rs32)",
tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5]> {
tc_4d99bca9, TypeS_2op>, Enc_890909 {
let Inst{13-7} = 0b0000000;
let Inst{31-21} = 0b10001011111;
let hasNewValue = 1;
@ -4634,7 +4634,7 @@ def F2_sfmax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmax($Rs32,$Rt32)",
tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
tc_976ddc4f, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@ -4648,7 +4648,7 @@ def F2_sfmin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmin($Rs32,$Rt32)",
tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
tc_976ddc4f, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@ -4662,7 +4662,7 @@ def F2_sfmpy : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmpy($Rs32,$Rt32)",
tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
tc_6792d5ff, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011010;
@ -4676,7 +4676,7 @@ def F2_sfrecipa : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5]> {
tc_9c00ce8d, TypeM>, Enc_a94f3b {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011111;
@ -4689,7 +4689,7 @@ def F2_sfsub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfsub($Rs32,$Rt32)",
tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
tc_6792d5ff, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@ -16981,7 +16981,7 @@ def M4_cmpyi_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@ -17007,7 +17007,7 @@ def M4_cmpyr_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@ -17360,7 +17360,7 @@ def M5_vdmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5]> {
tc_e913dc32, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@ -17372,7 +17372,7 @@ def M5_vdmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5]> {
tc_8fd5f294, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@ -18207,7 +18207,7 @@ def S2_asr_i_p_rnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii):rnd",
tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@ -18216,7 +18216,7 @@ def S2_asr_i_p_rnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asrrnd($Rss32,#$Ii)",
tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_2op> {
let isPseudo = 1;
}
def S2_asr_i_r : HInst<
@ -25151,7 +25151,7 @@ def S5_asrhub_rnd_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):raw",
tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@ -25164,7 +25164,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@ -25173,7 +25173,7 @@ def S5_asrhub_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):sat",
tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b101;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@ -25186,7 +25186,7 @@ def S5_popcountp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = popcount($Rss32)",
tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
tc_00afc57e, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@ -25197,7 +25197,7 @@ def S5_vasrhrnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):raw",
tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000001;
@ -25207,7 +25207,7 @@ def S5_vasrhrnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):rnd",
tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
tc_2b6f77c6, TypeS_2op> {
let isPseudo = 1;
}
def S6_allocframe_to_raw : HInst<
@ -37007,7 +37007,7 @@ def Y5_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"l2fetch($Rs32,$Rtt32)",
tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5]> {
tc_daa058fa, TypeST>, Enc_e6abcf {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110100;

View File

@ -1228,7 +1228,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST)
: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
Subtarget(ST) {
bool IsV4 = !Subtarget.hasV5Ops();
auto &HRI = *Subtarget.getRegisterInfo();
setPrefLoopAlignment(4);
@ -1270,10 +1269,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
if (Subtarget.hasV5Ops()) {
addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
//
// Handling of scalar operations.
@ -1351,8 +1348,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ, MVT::i8, Promote);
setOperationAction(ISD::CTTZ, MVT::i16, Promote);
// In V5, popcount can count # of 1s in i64 but returns i32.
// On V4 it will be expanded (set later).
// Popcount can count # of 1s in i64 but returns i32.
setOperationAction(ISD::CTPOP, MVT::i8, Promote);
setOperationAction(ISD::CTPOP, MVT::i16, Promote);
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
@ -1515,57 +1511,28 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::ROTL, MVT::i64, Custom);
}
if (Subtarget.hasV5Ops()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
setOperationAction(ISD::FMUL, MVT::f64, Expand);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
// V5+.
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
setOperationAction(ISD::FMUL, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
} else { // V4
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::CTPOP, MVT::i8, Expand);
setOperationAction(ISD::CTPOP, MVT::i16, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
// Expand these operations for both f32 and f64:
for (unsigned FPExpOpV4 :
{ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
setOperationAction(FPExpOpV4, MVT::f32, Expand);
setOperationAction(FPExpOpV4, MVT::f64, Expand);
}
for (ISD::CondCode FPExpCCV4 :
{ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
ISD::SETUO, ISD::SETO}) {
setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
}
}
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
// Handling of indexed loads/stores: default is "expand".
//
@ -1601,42 +1568,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
if (IsV4) {
// Handle single-precision floating point operations on V4.
if (FastMath) {
setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
// Double-precision compares.
setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
} else {
setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
// Double-precision compares.
setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
}
}
// This is the only fast library function for sqrtd.
if (FastMath)
setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
// Prefix is: nothing for "slow-math",
// "fast2_" for V4 fast-math and V5+ fast-math double-precision
// "fast2_" for V5+ fast-math double-precision
// (actually, keep fast-math and fast-math2 separate for now)
if (FastMath) {
setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
// Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
} else {
setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
@ -1646,44 +1589,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
}
if (Subtarget.hasV5Ops()) {
if (FastMath)
setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
else
setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
} else {
// V4
setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
}
if (FastMath)
setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
else
setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
// These cause problems when the shift amount is non-constant.
setLibcallName(RTLIB::SHL_I128, nullptr);
@ -3007,7 +2916,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Subtarget.hasV5Ops();
return true;
}
/// isLegalAddressingMode - Return true if the addressing mode represented by

View File

@ -194,8 +194,6 @@ class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> :
// Instruction Classes Definitions +
//===----------------------------------------------------------------------===//
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
let mayLoad = 1 in
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
@ -205,9 +203,6 @@ class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
let mayStore = 1 in
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
@ -235,15 +230,6 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Instruction Classes Definitions -
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// V4 Instruction Format Definitions +
//===----------------------------------------------------------------------===//
include "HexagonInstrFormatsV4.td"
//===----------------------------------------------------------------------===//
// V60+ Instruction Format Definitions +
//===----------------------------------------------------------------------===//
include "HexagonInstrFormatsV5.td"
include "HexagonInstrFormatsV60.td"
include "HexagonInstrFormatsV65.td"

View File

@ -1,4 +1,4 @@
//==- HexagonInstrFormatsV4.td - Hexagon Instruction Formats --*- tablegen -==//
//==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
//
// The LLVM Compiler Infrastructure
//
@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
// This file describes the Hexagon V4 instruction classes in TableGen format.
// This file describes the Hexagon V5 instruction classes in TableGen format.
//
//===----------------------------------------------------------------------===//

View File

@ -1398,7 +1398,5 @@ def: T_R_pat<Y2_dczeroa, int_hexagon_Y2_dczeroa>;
def: T_RR_pat<Y4_l2fetch, int_hexagon_Y4_l2fetch>;
def: T_RP_pat<Y5_l2fetch, int_hexagon_Y5_l2fetch>;
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
include "HexagonIntrinsicsV60.td"

View File

@ -1,27 +0,0 @@
//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
//
//===----------------------------------------------------------------------===//
// Vector reduce complex multiply real or imaginary
def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
// Vector reduce add unsigned halfwords
def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;

View File

@ -1,305 +0,0 @@
//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This is populated based on the following specs:
// Hexagon V4 Architecture Extensions
// Application-Level Specification
// 80-V9418-12 Rev. A
// June 15, 2010
// Vector reduce multiply word by signed half (32x16)
//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
// Vector multiply halfwords, signed by unsigned
// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
// Vector polynomial multiply halfwords
// Rdd=vpmpyh(Rs,Rt)
def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
// Rxx[^]=vpmpyh(Rs,Rt)
def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
// Polynomial multiply words
// Rdd=pmpyw(Rs,Rt)
def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
// Rxx^=pmpyw(Rs,Rt)
def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
//Rxx^=asr(Rss,Rt)
def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
//Rxx^=asl(Rss,Rt)
def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
//Rxx^=lsr(Rss,Rt)
def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
//Rxx^=lsl(Rss,Rt)
def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
// Multiply and use upper result
def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
// Vector reduce add unsigned halfwords
def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
def: T_P_pat<S2_ct0p, int_hexagon_S2_ct0p>;
def: T_P_pat<S2_ct1p, int_hexagon_S2_ct1p>;
def: T_Q_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
def: T_Q_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
def : T_Q_PI_pat<A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi>;
def : T_Q_PI_pat<A4_vcmpbgti, int_hexagon_A4_vcmpbgti>;
def : T_Q_PI_pat<A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui>;
def : T_Q_PI_pat<A4_vcmpheqi, int_hexagon_A4_vcmpheqi>;
def : T_Q_PI_pat<A4_vcmphgti, int_hexagon_A4_vcmphgti>;
def : T_Q_PI_pat<A4_vcmphgtui, int_hexagon_A4_vcmphgtui>;
def : T_Q_PI_pat<A4_vcmpweqi, int_hexagon_A4_vcmpweqi>;
def : T_Q_PI_pat<A4_vcmpwgti, int_hexagon_A4_vcmpwgti>;
def : T_Q_PI_pat<A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui>;
def : T_Q_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
def : T_Q_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
def : T_Q_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
def : T_Q_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
def : T_Q_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
def : T_Q_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
def : T_Q_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
def : T_Q_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
def : T_Q_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
def : T_Q_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
def : T_Q_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
def : T_Q_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
def : T_RRR_pat <M4_mpyrr_addr, int_hexagon_M4_mpyrr_addr>;
def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
// Complex multiply 32x16
def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
// Complex add/sub halfwords/words
def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
// Extract bitfield
def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
// Vector conditional negate
// Rdd=vcnegh(Rss,Rt)
def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
// Shift an immediate left by register amount
def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
// Vector reduce maximum halfwords
def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
// Vector reduce maximum words
def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
// Vector reduce minimum halfwords
def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
// Vector reduce minimum words
def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
// Rotate and reduce bytes
def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
u2_0ImmPred:$src3),
(S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
// Rotate and reduce bytes with accumulation
// Rxx+=vrcrotate(Rss,Rt,#u2)
def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
IntRegs:$src3, u2_0ImmPred:$src4),
(S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
IntRegs:$src3, u2_0ImmPred:$src4)>;
// Vector conditional negate
def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
// Logical xor with xor accumulation
def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
// ALU64 - Vector min/max byte
def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
// Shift and add/sub/and/or
def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
// Split bitfield
def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
//*******************************************************************
// ALU32/ALU
//*******************************************************************
// ALU32 / ALU / Logical Operations.
def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
//*******************************************************************
// ALU32/PERM
//*******************************************************************
// Combine Words Into Doublewords.
def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
//*******************************************************************
// ALU32/PRED
//*******************************************************************
// Compare
def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
// Compare To General Register.
def: T_Q_RR_pat<C4_cmpneq, int_hexagon_C4_cmpneq>;
def: T_Q_RR_pat<C4_cmplte, int_hexagon_C4_cmplte>;
def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
//*******************************************************************
// CR
//*******************************************************************
// CR / Logical Operations On Predicates.
def: T_Q_QQQ_pat<C4_and_and, int_hexagon_C4_and_and>;
def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
def: T_Q_QQQ_pat<C4_and_or, int_hexagon_C4_and_or>;
def: T_Q_QQQ_pat<C4_and_orn, int_hexagon_C4_and_orn>;
def: T_Q_QQQ_pat<C4_or_and, int_hexagon_C4_or_and>;
def: T_Q_QQQ_pat<C4_or_andn, int_hexagon_C4_or_andn>;
def: T_Q_QQQ_pat<C4_or_or, int_hexagon_C4_or_or>;
def: T_Q_QQQ_pat<C4_or_orn, int_hexagon_C4_or_orn>;
//*******************************************************************
// XTYPE/ALU
//*******************************************************************
// Add And Accumulate.
def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
// XTYPE / ALU / Logical-logical Words.
def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
// Modulo wrap.
def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
// Arithmetic/Convergent round
// Rd=[cround|round](Rs,Rt)[:sat]
// Rd=[cround|round](Rs,#u5)[:sat]
def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;

View File

@ -7,9 +7,314 @@
//
//===----------------------------------------------------------------------===//
def : T_PR_pat <M2_vrcmpys_s1, int_hexagon_M2_vrcmpys_s1>;
def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
def : T_PR_pat <M2_vrcmpys_s1rp, int_hexagon_M2_vrcmpys_s1rp>;
// Vector reduce add unsigned halfwords
def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
def: T_RP_pat<A2_addsp, int_hexagon_A2_addsp>;
def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
def: T_PP_pat<A2_minp, int_hexagon_A2_minp>;
def: T_PP_pat<A2_minup, int_hexagon_A2_minup>;
def: T_PP_pat<A2_maxp, int_hexagon_A2_maxp>;
def: T_PP_pat<A2_maxup, int_hexagon_A2_maxup>;
// Vector reduce multiply word by signed half (32x16)
//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
// Vector multiply halfwords, signed by unsigned
// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
// Vector polynomial multiply halfwords
// Rdd=vpmpyh(Rs,Rt)
def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
// Rxx[^]=vpmpyh(Rs,Rt)
def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
// Polynomial multiply words
// Rdd=pmpyw(Rs,Rt)
def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
// Rxx^=pmpyw(Rs,Rt)
def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
//Rxx^=asr(Rss,Rt)
def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
//Rxx^=asl(Rss,Rt)
def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
//Rxx^=lsr(Rss,Rt)
def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
//Rxx^=lsl(Rss,Rt)
def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
// Multiply and use upper result
def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
// Vector reduce add unsigned halfwords
def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
def: T_P_pat<S2_ct0p, int_hexagon_S2_ct0p>;
def: T_P_pat<S2_ct1p, int_hexagon_S2_ct1p>;
def: T_Q_RR_pat<C4_nbitsset, int_hexagon_C4_nbitsset>;
def: T_Q_RR_pat<C4_nbitsclr, int_hexagon_C4_nbitsclr>;
def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
def : T_Q_PI_pat<A4_vcmpbeqi, int_hexagon_A4_vcmpbeqi>;
def : T_Q_PI_pat<A4_vcmpbgti, int_hexagon_A4_vcmpbgti>;
def : T_Q_PI_pat<A4_vcmpbgtui, int_hexagon_A4_vcmpbgtui>;
def : T_Q_PI_pat<A4_vcmpheqi, int_hexagon_A4_vcmpheqi>;
def : T_Q_PI_pat<A4_vcmphgti, int_hexagon_A4_vcmphgti>;
def : T_Q_PI_pat<A4_vcmphgtui, int_hexagon_A4_vcmphgtui>;
def : T_Q_PI_pat<A4_vcmpweqi, int_hexagon_A4_vcmpweqi>;
def : T_Q_PI_pat<A4_vcmpwgti, int_hexagon_A4_vcmpwgti>;
def : T_Q_PI_pat<A4_vcmpwgtui, int_hexagon_A4_vcmpwgtui>;
def : T_Q_PP_pat<A4_vcmpbeq_any, int_hexagon_A4_vcmpbeq_any>;
def : T_Q_RR_pat<A4_cmpbeq, int_hexagon_A4_cmpbeq>;
def : T_Q_RR_pat<A4_cmpbgt, int_hexagon_A4_cmpbgt>;
def : T_Q_RR_pat<A4_cmpbgtu, int_hexagon_A4_cmpbgtu>;
def : T_Q_RR_pat<A4_cmpheq, int_hexagon_A4_cmpheq>;
def : T_Q_RR_pat<A4_cmphgt, int_hexagon_A4_cmphgt>;
def : T_Q_RR_pat<A4_cmphgtu, int_hexagon_A4_cmphgtu>;
def : T_Q_RI_pat<A4_cmpbeqi, int_hexagon_A4_cmpbeqi>;
def : T_Q_RI_pat<A4_cmpbgti, int_hexagon_A4_cmpbgti>;
def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
def : T_Q_RI_pat<A4_cmpheqi, int_hexagon_A4_cmpheqi>;
def : T_Q_RI_pat<A4_cmphgti, int_hexagon_A4_cmphgti>;
def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
def : T_Q_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
def : T_RRR_pat <M4_mpyrr_addr, int_hexagon_M4_mpyrr_addr>;
def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
// Complex multiply 32x16
def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
def : T_PP_pat<A4_ornp, int_hexagon_A4_ornp>;
// Complex add/sub halfwords/words
def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
// Extract bitfield
def : T_PP_pat <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
def : T_RP_pat <S4_extract_rp, int_hexagon_S4_extract_rp>;
def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
// Vector conditional negate
// Rdd=vcnegh(Rss,Rt)
def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
// Shift an immediate left by register amount
def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
// Vector reduce maximum halfwords
def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
// Vector reduce maximum words
def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
// Vector reduce minimum halfwords
def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
// Vector reduce minimum words
def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
// Rotate and reduce bytes
def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
u2_0ImmPred:$src3),
(S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
// Rotate and reduce bytes with accumulation
// Rxx+=vrcrotate(Rss,Rt,#u2)
def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
IntRegs:$src3, u2_0ImmPred:$src4),
(S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
IntRegs:$src3, u2_0ImmPred:$src4)>;
// Vector conditional negate
def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
// Logical xor with xor accumulation
def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
// ALU64 - Vector min/max byte
def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
// Shift and add/sub/and/or
def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
def : T_IRI_pat <S4_ori_asl_ri, int_hexagon_S4_ori_asl_ri>;
def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
def : T_IRI_pat <S4_ori_lsr_ri, int_hexagon_S4_ori_lsr_ri>;
def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
// Split bitfield
def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
def: T_RR_pat<S4_parity, int_hexagon_S4_parity>;
def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
def: T_RI_pat<S4_clbaddi, int_hexagon_S4_clbaddi>;
def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
//*******************************************************************
// ALU32/ALU
//*******************************************************************
// ALU32 / ALU / Logical Operations.
def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
//*******************************************************************
// ALU32/PERM
//*******************************************************************
// Combine Words Into Doublewords.
def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
//*******************************************************************
// ALU32/PRED
//*******************************************************************
// Compare
def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
// Compare To General Register.
def: T_Q_RR_pat<C4_cmpneq, int_hexagon_C4_cmpneq>;
def: T_Q_RR_pat<C4_cmplte, int_hexagon_C4_cmplte>;
def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
def: T_RI_pat<A4_rcmpeqi, int_hexagon_A4_rcmpeqi>;
def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
//*******************************************************************
// CR
//*******************************************************************
// CR / Logical Operations On Predicates.
def: T_Q_QQQ_pat<C4_and_and, int_hexagon_C4_and_and>;
def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
def: T_Q_QQQ_pat<C4_and_or, int_hexagon_C4_and_or>;
def: T_Q_QQQ_pat<C4_and_orn, int_hexagon_C4_and_orn>;
def: T_Q_QQQ_pat<C4_or_and, int_hexagon_C4_or_and>;
def: T_Q_QQQ_pat<C4_or_andn, int_hexagon_C4_or_andn>;
def: T_Q_QQQ_pat<C4_or_or, int_hexagon_C4_or_or>;
def: T_Q_QQQ_pat<C4_or_orn, int_hexagon_C4_or_orn>;
//*******************************************************************
// XTYPE/ALU
//*******************************************************************
// Add And Accumulate.
def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
// XTYPE / ALU / Logical-logical Words.
def : T_RRR_pat <M4_or_xor, int_hexagon_M4_or_xor>;
def : T_RRR_pat <M4_and_xor, int_hexagon_M4_and_xor>;
def : T_RRR_pat <M4_or_and, int_hexagon_M4_or_and>;
def : T_RRR_pat <M4_and_and, int_hexagon_M4_and_and>;
def : T_RRR_pat <M4_xor_and, int_hexagon_M4_xor_and>;
def : T_RRR_pat <M4_or_or, int_hexagon_M4_or_or>;
def : T_RRR_pat <M4_and_or, int_hexagon_M4_and_or>;
def : T_RRR_pat <M4_xor_or, int_hexagon_M4_xor_or>;
def : T_RRR_pat <M4_or_andn, int_hexagon_M4_or_andn>;
def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
def : T_RRI_pat <S4_or_andix, int_hexagon_S4_or_andix>;
def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
// Modulo wrap.
def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
// Arithmetic/Convergent round
// Rd=[cround|round](Rs,Rt)[:sat]
// Rd=[cround|round](Rs,#u5)[:sat]
def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
//Rdd[+]=vrmpybsu(Rss,Rtt)
//Rdd[+]=vrmpybuu(Rss,Rtt)
let Predicates = [HasV5] in {
def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
@ -31,7 +336,6 @@ def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;
// Rd=vaddhub(Rss,Rtt):sat
def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
}
def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;

View File

@ -365,38 +365,34 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
// --(2) Type cast -------------------------------------------------------
//
let Predicates = [HasV5] in {
def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;
def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;
def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;
def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;
def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;
def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;
def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
}
def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
let Predicates = [HasV5] in {
def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
}
def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
@ -599,31 +595,29 @@ def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
let Predicates = [HasV5] in {
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
}
def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
// Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.
@ -746,32 +740,28 @@ class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>;
class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
let Predicates = [HasV5] in {
def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
}
def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
let Predicates = [HasV5] in {
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
}
def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
// --(6) Select ----------------------------------------------------------
@ -801,27 +791,25 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
let Predicates = [HasV5] in {
def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
(C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
(C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
(C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
(C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
(C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
(C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
}
def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
(LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
@ -889,7 +877,7 @@ let AddedComplexity = 200 in {
defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
}
let AddedComplexity = 100, Predicates = [HasV5] in {
let AddedComplexity = 100 in {
defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
@ -1014,7 +1002,7 @@ let Predicates = [HasV60] in {
def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
(S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
(S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
(S2_asr_i_p_rnd I64:$Rs, imm:$u6)>;
// Prefer S2_addasl_rrri over S2_asl_i_r_acc.
let AddedComplexity = 120 in
@ -1191,17 +1179,15 @@ def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>;
let Predicates = [HasV5] in {
def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
def: Pat<(fabs F64:$Rs),
(Combinew (S2_clrbit_i (HiReg $Rs), 31),
(i32 (LoReg $Rs)))>;
def: Pat<(fneg F64:$Rs),
(Combinew (S2_togglebit_i (HiReg $Rs), 31),
(i32 (LoReg $Rs)))>;
}
def: Pat<(fabs F64:$Rs),
(Combinew (S2_clrbit_i (HiReg $Rs), 31),
(i32 (LoReg $Rs)))>;
def: Pat<(fneg F64:$Rs),
(Combinew (S2_togglebit_i (HiReg $Rs), 31),
(i32 (LoReg $Rs)))>;
def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
@ -1267,13 +1253,11 @@ def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>;
def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>;
def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>;
let Predicates = [HasV5] in {
def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;
}
def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;
// In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
// over add-add with individual multiplies as inputs.
@ -1506,14 +1490,12 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
(M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
let Predicates = [HasV5] in {
def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
}
def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
@ -1540,14 +1522,12 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
// Multiplies two v4i8 vectors.
def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
(S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
Requires<[HasV5]>;
(S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>;
// Multiplies two v8i8 vectors.
def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
(Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
(S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
Requires<[HasV5]>;
(S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>;
// --(10) Bit ------------------------------------------------------------

View File

@ -118,18 +118,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool HasEHReturn = MF->getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();
switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case Hexagon::ArchEnum::V4:
case Hexagon::ArchEnum::V5:
case Hexagon::ArchEnum::V55:
case Hexagon::ArchEnum::V60:
case Hexagon::ArchEnum::V62:
case Hexagon::ArchEnum::V65:
return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
}
llvm_unreachable("Callee saved registers requested for unknown architecture "
"version");
return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
}

View File

@ -57,10 +57,10 @@ include "HexagonDepIICScalar.td"
include "HexagonDepIICHVX.td"
//===----------------------------------------------------------------------===//
// V4 Machine Info +
// V5 Machine Info +
//===----------------------------------------------------------------------===//
include "HexagonScheduleV4.td"
include "HexagonScheduleV5.td"
// V55 Machine Info +
include "HexagonScheduleV55.td"

View File

@ -1,4 +1,4 @@
//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
//=-HexagonScheduleV5.td - HexagonV5 Scheduling Definitions --*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@ -10,8 +10,8 @@
def LD_tc_ld_SLOT01 : InstrItinClass;
def ST_tc_st_SLOT01 : InstrItinClass;
class HexagonV4PseudoItin {
list<InstrItinData> V4PseudoItin_list = [
class HexagonV5PseudoItin {
list<InstrItinData> V5PseudoItin_list = [
InstrItinData<PSEUDO, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [SLOT2, SLOT3]>]>,
@ -20,27 +20,27 @@ class HexagonV4PseudoItin {
];
}
def HexagonV4ItinList : DepScalarItinV4, HexagonV4PseudoItin {
list<InstrItinData> V4Itin_list = [
def HexagonV5ItinList : DepScalarItinV5, HexagonV5PseudoItin {
list<InstrItinData> V5Itin_list = [
InstrItinData<LD_tc_ld_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData<ST_tc_st_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>
];
list<InstrItinData> ItinList =
!listconcat(V4Itin_list, DepScalarItinV4_list, V4PseudoItin_list);
!listconcat(V5Itin_list, DepScalarItinV5_list, V5PseudoItin_list);
}
def HexagonItinerariesV4 :
def HexagonItinerariesV5 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP],
[Hex_FWD], HexagonV4ItinList.ItinList>;
[Hex_FWD], HexagonV5ItinList.ItinList>;
def HexagonModelV4 : SchedMachineModel {
def HexagonModelV5 : SchedMachineModel {
// Max issue per cycle == bundle width.
let IssueWidth = 4;
let Itineraries = HexagonItinerariesV4;
let Itineraries = HexagonItinerariesV5;
let LoadLatency = 1;
let CompleteModel = 0;
}
//===----------------------------------------------------------------------===//
// Hexagon V4 Resource Definitions -
// Hexagon V5 Resource Definitions -
//===----------------------------------------------------------------------===//

View File

@ -93,7 +93,6 @@ HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
{"generic", Hexagon::ArchEnum::V60},
{"hexagonv4", Hexagon::ArchEnum::V4},
{"hexagonv5", Hexagon::ArchEnum::V5},
{"hexagonv55", Hexagon::ArchEnum::V55},
{"hexagonv60", Hexagon::ArchEnum::V60},

View File

@ -59,7 +59,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
public:
Hexagon::ArchEnum HexagonArchVersion;
Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4;
Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::NoArch;
CodeGenOpt::Level OptLevel;
/// True if the target should use Back-Skip-Back scheduling. This is the
/// default for V60.
@ -158,7 +158,9 @@ public:
bool useNewValueStores() const { return UseNewValueStores; }
bool useSmallData() const { return UseSmallData; }
bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; }
bool useHVXOps() const {
return HexagonHVXVersion > Hexagon::ArchEnum::NoArch;
}
bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }

View File

@ -768,7 +768,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
// Make sure that for non-POST_INC stores:
// 1. The only use of reg is DepReg and no other registers.
// This handles V4 base+index registers.
// This handles base+index registers.
// The following store can not be dot new.
// Eg. r0 = add(r0, #3)
// memw(r1+r0<<#2) = r0
@ -838,11 +838,7 @@ static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
return false;
}
// Check to see if an instruction can be dot new
// There are three kinds.
// 1. dot new on predicate - V2/V3/V4
// 2. dot new on stores NV/ST - V4
// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
// Check to see if an instruction can be dot new.
bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC) {
@ -1075,9 +1071,6 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
if (MI.isInlineAsm() && !ScheduleInlineAsm)
return true;
// From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
// trap, pause, barrier, icinva, isync, and syncht are solo instructions.
// They must not be grouped with other instructions in a packet.
if (isSchedBarrier(MI))
return true;
@ -1289,8 +1282,8 @@ bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
return false;
}
bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
const MachineInstr &J) {
bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
const MachineInstr &J) {
bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
bool StoreI = I.mayStore(), StoreJ = J.mayStore();
if ((SysI && StoreJ) || (SysJ && StoreI))
@ -1343,10 +1336,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
if (Dependence)
return false;
// V4 allows dual stores. It does not allow second store, if the first
// store is not in SLOT0. New value store, new value jump, dealloc_return
// and memop always take SLOT0. Arch spec 3.4.4.2.
Dependence = hasV4SpecificDependence(I, J);
// Dual-store does not allow second store, if the first store is not
// in SLOT0. New value store, new value jump, dealloc_return and memop
// always take SLOT0. Arch spec 3.4.4.2.
Dependence = hasDualStoreDependence(I, J);
if (Dependence)
return false;
@ -1505,10 +1498,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
}
// For Order dependences:
// 1. On V4 or later, volatile loads/stores can be packetized together,
// unless other rules prevent is.
// 1. Volatile loads/stores can be packetized together, unless other
// rules prevent is.
// 2. Store followed by a load is not allowed.
// 3. Store followed by a store is only valid on V4 or later.
// 3. Store followed by a store is valid.
// 4. Load followed by any memory operation is allowed.
if (DepType == SDep::Order) {
if (!PacketizeVolatiles) {
@ -1555,7 +1548,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
continue;
}
// For V4, special case ALLOCFRAME. Even though there is dependency
// Special case for ALLOCFRAME: even though there is dependency
// between ALLOCFRAME and subsequent store, allow it to be packetized
// in a same packet. This implies that the store is using the caller's
// SP. Hence, offset needs to be updated accordingly.

View File

@ -149,7 +149,7 @@ protected:
bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J);
bool producesStall(const MachineInstr &MI);
};

View File

@ -634,8 +634,7 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
return false;
}
if (STI.getCPU().equals_lower("hexagonv4") ||
STI.getCPU().equals_lower("hexagonv5") ||
if (STI.getCPU().equals_lower("hexagonv5") ||
STI.getCPU().equals_lower("hexagonv55") ||
STI.getCPU().equals_lower("hexagonv60")) {
// If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);

View File

@ -61,8 +61,6 @@ cl::opt<bool> llvm::HexagonDisableDuplex
cl::desc("Disable looking for duplex instructions for Hexagon"));
namespace { // These flags are to be deprecated
cl::opt<bool> MV4("mv4", cl::Hidden, cl::desc("Build for Hexagon V4"),
cl::init(false));
cl::opt<bool> MV5("mv5", cl::Hidden, cl::desc("Build for Hexagon V5"),
cl::init(false));
cl::opt<bool> MV55("mv55", cl::Hidden, cl::desc("Build for Hexagon V55"),
@ -83,18 +81,18 @@ cl::opt<Hexagon::ArchEnum>
clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
// Sentinal for no value specified
clEnumValN(Hexagon::ArchEnum::V5, "", "")),
clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
// Sentinal for flag not present
cl::init(Hexagon::ArchEnum::V4), cl::ValueOptional);
cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
static cl::opt<bool>
DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions"));
DisableHVX("mno-hvx", cl::Hidden,
cl::desc("Disable Hexagon Vector eXtensions"));
static StringRef DefaultArch = "hexagonv60";
static StringRef HexagonGetArchVariant() {
if (MV4)
return "hexagonv4";
if (MV5)
return "hexagonv5";
if (MV55)
@ -123,7 +121,7 @@ StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) {
return ArchV;
}
unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV4FU::SLOT3; }
unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV5FU::SLOT3; }
namespace {
@ -279,6 +277,7 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
Result.push_back(FS);
switch (EnableHVX) {
case Hexagon::ArchEnum::V5:
case Hexagon::ArchEnum::V55:
break;
case Hexagon::ArchEnum::V60:
@ -290,14 +289,14 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V65:
Result.push_back("+hvxv65");
break;
case Hexagon::ArchEnum::V5:{
case Hexagon::ArchEnum::Generic:{
Result.push_back(StringSwitch<StringRef>(CPU)
.Case("hexagonv60", "+hvxv60")
.Case("hexagonv62", "+hvxv62")
.Case("hexagonv65", "+hvxv65"));
break;
}
case Hexagon::ArchEnum::V4:
case Hexagon::ArchEnum::NoArch:
// Sentinal if -mhvx isn't specified
break;
}
@ -307,15 +306,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
static bool isCPUValid(std::string CPU)
{
std::vector<std::string> table
{
"generic",
"hexagonv4",
"hexagonv5",
"hexagonv55",
"hexagonv60",
"hexagonv62",
"hexagonv65",
std::vector<std::string> table {
"generic", "hexagonv5", "hexagonv55", "hexagonv60",
"hexagonv62", "hexagonv65",
};
return std::find(table.begin(), table.end(), CPU) != table.end();
@ -336,8 +329,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// Make sure that +hvx-length turns hvx on, and that "hvx" alone
// turns on hvxvNN, corresponding to the existing ArchVNN.
FeatureBitset FB = S;
unsigned CpuArch = ArchV4;
for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5, ArchV4}) {
unsigned CpuArch = ArchV5;
for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@ -402,7 +395,6 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
static std::map<StringRef,unsigned> ElfFlags = {
{"hexagonv4", ELF::EF_HEXAGON_MACH_V4},
{"hexagonv5", ELF::EF_HEXAGON_MACH_V5},
{"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
{"hexagonv60", ELF::EF_HEXAGON_MACH_V60},

View File

@ -32,8 +32,8 @@ declare i32 @bar(i32, i32) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #0 = { nounwind "target-cpu"="hexagonv5" }
attributes #1 = { "target-cpu"="hexagonv5" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }

View File

@ -1,22 +1,24 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: __hexagon_adddf3
; CHECK: __hexagon_subdf3
define void @foo(double* %acc, double %num, double %num2) nounwind {
entry:
%acc.addr = alloca double*, align 4
%num.addr = alloca double, align 8
%num2.addr = alloca double, align 8
store double* %acc, double** %acc.addr, align 4
store double %num, double* %num.addr, align 8
store double %num2, double* %num2.addr, align 8
%0 = load double*, double** %acc.addr, align 4
%1 = load double, double* %0
%2 = load double, double* %num.addr, align 8
%add = fadd double %1, %2
%3 = load double, double* %num2.addr, align 8
%sub = fsub double %add, %3
%4 = load double*, double** %acc.addr, align 4
store double %sub, double* %4
define void @f0(double* %a0, double %a1, double %a2) #0 {
b0:
%v0 = alloca double*, align 4
%v1 = alloca double, align 8
%v2 = alloca double, align 8
store double* %a0, double** %v0, align 4
store double %a1, double* %v1, align 8
store double %a2, double* %v2, align 8
%v3 = load double*, double** %v0, align 4
%v4 = load double, double* %v3
%v5 = load double, double* %v1, align 8
%v6 = fadd double %v4, %v5
%v7 = load double, double* %v2, align 8
%v8 = fsub double %v6, %v7
%v9 = load double*, double** %v0, align 4
store double %v8, double* %v9
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,22 +1,24 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: __hexagon_addsf3
; CHECK: __hexagon_subsf3
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: sfadd
; CHECK: sfsub
define void @foo(float* %acc, float %num, float %num2) nounwind {
entry:
%acc.addr = alloca float*, align 4
%num.addr = alloca float, align 4
%num2.addr = alloca float, align 4
store float* %acc, float** %acc.addr, align 4
store float %num, float* %num.addr, align 4
store float %num2, float* %num2.addr, align 4
%0 = load float*, float** %acc.addr, align 4
%1 = load float, float* %0
%2 = load float, float* %num.addr, align 4
%add = fadd float %1, %2
%3 = load float, float* %num2.addr, align 4
%sub = fsub float %add, %3
%4 = load float*, float** %acc.addr, align 4
store float %sub, float* %4
define void @f0(float* %a0, float %a1, float %a2) #0 {
b0:
%v0 = alloca float*, align 4
%v1 = alloca float, align 4
%v2 = alloca float, align 4
store float* %a0, float** %v0, align 4
store float %a1, float* %v1, align 4
store float %a2, float* %v2, align 4
%v3 = load float*, float** %v0, align 4
%v4 = load float, float* %v3
%v5 = load float, float* %v1, align 4
%v6 = fadd float %v4, %v5
%v7 = load float, float* %v2, align 4
%v8 = fsub float %v6, %v7
%v9 = load float*, float** %v0, align 4
store float %v8, float* %v9
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,22 +1,24 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: __hexagon_addsf3
; CHECK: __hexagon_subsf3
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: sfadd
; CHECK: sfsub
define void @foo(float* %acc, float %num, float %num2) nounwind {
entry:
%acc.addr = alloca float*, align 4
%num.addr = alloca float, align 4
%num2.addr = alloca float, align 4
store float* %acc, float** %acc.addr, align 4
store float %num, float* %num.addr, align 4
store float %num2, float* %num2.addr, align 4
%0 = load float*, float** %acc.addr, align 4
%1 = load float, float* %0
%2 = load float, float* %num.addr, align 4
%add = fadd float %1, %2
%3 = load float, float* %num2.addr, align 4
%sub = fsub float %add, %3
%4 = load float*, float** %acc.addr, align 4
store float %sub, float* %4
define void @f0(float* %a0, float %a1, float %a2) #0 {
b0:
%v0 = alloca float*, align 4
%v1 = alloca float, align 4
%v2 = alloca float, align 4
store float* %a0, float** %v0, align 4
store float %a1, float* %v1, align 4
store float %a2, float* %v2, align 4
%v3 = load float*, float** %v0, align 4
%v4 = load float, float* %v3
%v5 = load float, float* %v1, align 4
%v6 = fadd float %v4, %v5
%v7 = load float, float* %v2, align 4
%v8 = fsub float %v6, %v7
%v9 = load float*, float** %v0, align 4
store float %v8, float* %v9
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,51 +1,57 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that we generate load instructions with global + offset
%struct.struc = type { i8, i8, i16, i32 }
@foo = common global %struct.struc zeroinitializer, align 4
%s.0 = type { i8, i8, i16, i32 }
define void @loadWord(i32 %val1, i32 %val2, i32* nocapture %ival) nounwind {
; CHECK: r{{[0-9]+}} = memw(##foo+4)
entry:
%cmp = icmp sgt i32 %val1, %val2
br i1 %cmp, label %if.then, label %if.end
@g0 = common global %s.0 zeroinitializer, align 4
if.then: ; preds = %entry
%0 = load i32, i32* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 3), align 4
store i32 %0, i32* %ival, align 4
br label %if.end
; CHECK-LABEL: f0:
; CHECK: r{{[0-9]+}} = memw(##g0+4)
define void @f0(i32 %a0, i32 %a1, i32* nocapture %a2) #0 {
b0:
%v0 = icmp sgt i32 %a0, %a1
br i1 %v0, label %b1, label %b2
if.end: ; preds = %if.then, %entry
b1: ; preds = %b0
%v1 = load i32, i32* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 3), align 4
store i32 %v1, i32* %a2, align 4
br label %b2
b2: ; preds = %b1, %b0
ret void
}
define void @loadByte(i32 %val1, i32 %val2, i8* nocapture %ival) nounwind {
; CHECK: r{{[0-9]+}} = memub(##foo+1)
entry:
%cmp = icmp sgt i32 %val1, %val2
br i1 %cmp, label %if.then, label %if.end
; CHECK-LABEL: f1:
; CHECK: r{{[0-9]+}} = memub(##g0+1)
define void @f1(i32 %a0, i32 %a1, i8* nocapture %a2) #0 {
b0:
%v0 = icmp sgt i32 %a0, %a1
br i1 %v0, label %b1, label %b2
if.then: ; preds = %entry
%0 = load i8, i8* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 1), align 1
store i8 %0, i8* %ival, align 1
br label %if.end
b1: ; preds = %b0
%v1 = load i8, i8* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 1), align 1
store i8 %v1, i8* %a2, align 1
br label %b2
if.end: ; preds = %if.then, %entry
b2: ; preds = %b1, %b0
ret void
}
define void @loadHWord(i32 %val1, i32 %val2, i16* %ival) nounwind {
; CHECK: r{{[0-9]+}} = memuh(##foo+2)
entry:
%cmp = icmp sgt i32 %val1, %val2
br i1 %cmp, label %if.then, label %if.end
; CHECK-LABEL: f2:
; CHECK: r{{[0-9]+}} = memuh(##g0+2)
define void @f2(i32 %a0, i32 %a1, i16* %a2) #0 {
b0:
%v0 = icmp sgt i32 %a0, %a1
br i1 %v0, label %b1, label %b2
if.then: ; preds = %entry
%0 = load i16, i16* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 2), align 2
store i16 %0, i16* %ival, align 2
br label %if.end
b1: ; preds = %b0
%v1 = load i16, i16* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 2), align 2
store i16 %v1, i16* %a2, align 2
br label %b2
if.end: ; preds = %if.then, %entry
b2: ; preds = %b1, %b0
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,35 +1,38 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that we generate store instructions with global + offset
%struct.struc = type { i8, i8, i16, i32 }
%s.0 = type { i8, i8, i16, i32 }
@foo = common global %struct.struc zeroinitializer, align 4
@g0 = common global %s.0 zeroinitializer, align 4
define void @storeByte(i32 %val1, i32 %val2, i8 zeroext %ival) nounwind {
; CHECK: memb(##foo+1) = r{{[0-9]+}}
entry:
%cmp = icmp sgt i32 %val1, %val2
br i1 %cmp, label %if.then, label %if.end
; CHECK-LABEL: f0:
; CHECK: memb(##g0+1) = r{{[0-9]+}}
define void @f0(i32 %a0, i32 %a1, i8 zeroext %a2) #0 {
b0:
%v0 = icmp sgt i32 %a0, %a1
br i1 %v0, label %b1, label %b2
if.then: ; preds = %entry
store i8 %ival, i8* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 1), align 1
br label %if.end
b1: ; preds = %b0
store i8 %a2, i8* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 1), align 1
br label %b2
if.end: ; preds = %if.then, %entry
b2: ; preds = %b1, %b0
ret void
}
define void @storeHW(i32 %val1, i32 %val2, i16 signext %ival) nounwind {
; CHECK: memh(##foo+2) = r{{[0-9]+}}
entry:
%cmp = icmp sgt i32 %val1, %val2
br i1 %cmp, label %if.then, label %if.end
; CHECK-LABEL: f1:
; CHECK: memh(##g0+2) = r{{[0-9]+}}
define void @f1(i32 %a0, i32 %a1, i16 signext %a2) #0 {
b0:
%v0 = icmp sgt i32 %a0, %a1
br i1 %v0, label %b1, label %b2
if.then: ; preds = %entry
store i16 %ival, i16* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 2), align 2
br label %if.end
b1: ; preds = %b0
store i16 %a2, i16* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 2), align 2
br label %b2
if.end: ; preds = %if.then, %entry
b2: ; preds = %b1, %b0
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,33 +1,36 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that gp-relative instructions are being generated.
@a = common global i32 0, align 4
@b = common global i32 0, align 4
@c = common global i32 0, align 4
; CHECK: r{{[0-9]+}} = memw(gp+#g0)
; CHECK: r{{[0-9]+}} = memw(gp+#g1)
; CHECK: if (p{{[0-3]}}) memw(##g2) = r{{[0-9]+}}
define i32 @foo(i32 %p) #0 {
entry:
; CHECK: r{{[0-9]+}} = memw(gp+#a)
; CHECK: r{{[0-9]+}} = memw(gp+#b)
; CHECK: if (p{{[0-3]}}) memw(##c) = r{{[0-9]+}}
%0 = load i32, i32* @a, align 4
%1 = load i32, i32* @b, align 4
%add = add nsw i32 %1, %0
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
@g0 = common global i32 0, align 4
@g1 = common global i32 0, align 4
@g2 = common global i32 0, align 4
entry.if.end_crit_edge:
%.pre = load i32, i32* @c, align 4
br label %if.end
define i32 @f0(i32 %a0) #0 {
b0:
%v0 = load i32, i32* @g0, align 4
%v1 = load i32, i32* @g1, align 4
%v2 = add nsw i32 %v1, %v0
%v3 = icmp eq i32 %v0, %v1
br i1 %v3, label %b2, label %b1
if.then:
%add1 = add nsw i32 %add, %0
store i32 %add1, i32* @c, align 4
br label %if.end
b1: ; preds = %b0
%v4 = load i32, i32* @g2, align 4
br label %b3
if.end:
%2 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add1, %if.then ]
%cmp2 = icmp eq i32 %add, %2
%sel1 = select i1 %cmp2, i32 %2, i32 %1
ret i32 %sel1
b2: ; preds = %b0
%v5 = add nsw i32 %v2, %v0
store i32 %v5, i32* @g2, align 4
br label %b3
b3: ; preds = %b2, %b1
%v6 = phi i32 [ %v4, %b1 ], [ %v5, %b2 ]
%v7 = icmp eq i32 %v2, %v6
%v8 = select i1 %v7, i32 %v6, i32 %v1
ret i32 %v8
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,87 +1,91 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 -no-phi-elim-live-out-early-exit \
; RUN: < %s | FileCheck %s
; RUN: llc -march=hexagon -no-phi-elim-live-out-early-exit < %s | FileCheck %s
; Check that we remove the compare and induction variable instructions
; after generating hardware loops.
; Bug 6685.
; CHECK-LABEL: f0:
; CHECK: loop0
; CHECK-NOT: r{{[0-9]+}} = add(r{{[0-9]+}},#-1)
; CHECK-NOT: cmp.eq
; CHECK: endloop0
define i32 @test1(i32* nocapture %b, i32 %n) nounwind readonly {
entry:
%cmp1 = icmp sgt i32 %n, 0
br i1 %cmp1, label %for.body.preheader, label %for.end
define i32 @f0(i32* nocapture %a0, i32 %a1) #0 {
b0:
%v0 = icmp sgt i32 %a1, 0
br i1 %v0, label %b1, label %b4
for.body.preheader:
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.preheader, %for.body
%sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
%arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%0 = load i32, i32* %arrayidx.phi, align 4
%add = add nsw i32 %0, %sum.03
%inc = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %inc, %n
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end.loopexit, label %for.body
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %v5, %b2 ], [ 0, %b1 ]
%v2 = phi i32* [ %v8, %b2 ], [ %a0, %b1 ]
%v3 = phi i32 [ %v6, %b2 ], [ 0, %b1 ]
%v4 = load i32, i32* %v2, align 4
%v5 = add nsw i32 %v4, %v1
%v6 = add nsw i32 %v3, 1
%v7 = icmp eq i32 %v6, %a1
%v8 = getelementptr i32, i32* %v2, i32 1
br i1 %v7, label %b3, label %b2
for.end.loopexit:
br label %for.end
b3: ; preds = %b2
br label %b4
for.end:
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
ret i32 %sum.0.lcssa
b4: ; preds = %b3, %b0
%v9 = phi i32 [ 0, %b0 ], [ %v5, %b3 ]
ret i32 %v9
}
; This test checks that that initial loop count value is removed.
; CHECK-LABEL: f1:
; CHECK-NOT: ={{.}}#40
; CHECK: loop0
; CHECK-NOT: r{{[0-9]+}} = add(r{{[0-9]+}},#-1)
; CHECK-NOT: cmp.eq
; CHECK: endloop0
define i32 @test2(i32* nocapture %b) nounwind readonly {
entry:
br label %for.body
define i32 @f1(i32* nocapture %a0) #0 {
b0:
br label %b1
for.body:
%sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = load i32, i32* %arrayidx.phi, align 4
%add = add nsw i32 %0, %sum.02
%inc = add nsw i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 40
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end, label %for.body
b1: ; preds = %b1, %b0
%v0 = phi i32 [ 0, %b0 ], [ %v4, %b1 ]
%v1 = phi i32* [ %a0, %b0 ], [ %v7, %b1 ]
%v2 = phi i32 [ 0, %b0 ], [ %v5, %b1 ]
%v3 = load i32, i32* %v1, align 4
%v4 = add nsw i32 %v3, %v0
%v5 = add nsw i32 %v2, 1
%v6 = icmp eq i32 %v5, 40
%v7 = getelementptr i32, i32* %v1, i32 1
br i1 %v6, label %b2, label %b1
for.end:
ret i32 %add
b2: ; preds = %b1
ret i32 %v4
}
; This test checks that we don't remove the induction variable since it's used.
; CHECK-LABEL: f2:
; CHECK: loop0
; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#1)
; CHECK-NOT: cmp.eq
; CHECK: endloop0
define i32 @test3(i32* nocapture %b) nounwind {
entry:
br label %for.body
for.body:
%arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
store i32 %i.01, i32* %arrayidx.phi, align 4
%inc = add nsw i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 40
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end, label %for.body
define i32 @f2(i32* nocapture %a0) #1 {
b0:
br label %b1
for.end:
b1: ; preds = %b1, %b0
%v0 = phi i32* [ %a0, %b0 ], [ %v4, %b1 ]
%v1 = phi i32 [ 0, %b0 ], [ %v2, %b1 ]
store i32 %v1, i32* %v0, align 4
%v2 = add nsw i32 %v1, 1
%v3 = icmp eq i32 %v2, 40
%v4 = getelementptr i32, i32* %v0, i32 1
br i1 %v3, label %b2, label %b1
b2: ; preds = %b1
ret i32 0
}
attributes #0 = { nounwind readonly "target-cpu"="hexagonv5" }
attributes #1 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,27 +1,27 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 -O2 < %s | FileCheck %s
; ModuleID = 'hwloop-const.c'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: endloop
target triple = "hexagon-unknown-linux-gnu"
@b = common global [25000 x i32] zeroinitializer, align 8
@a = common global [25000 x i32] zeroinitializer, align 8
@c = common global [25000 x i32] zeroinitializer, align 8
@g0 = common global [25000 x i32] zeroinitializer, align 8
@g1 = common global [25000 x i32] zeroinitializer, align 8
define i32 @hwloop_bug() nounwind {
entry:
br label %for.body
define i32 @f0() #0 {
b0:
br label %b1
; CHECK: endloop
for.body: ; preds = %for.body, %entry
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [25000 x i32], [25000 x i32]* @b, i32 0, i32 %i.02
store i32 %i.02, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [25000 x i32], [25000 x i32]* @a, i32 0, i32 %i.02
store i32 %i.02, i32* %arrayidx1, align 4
%inc = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %inc, 25000
br i1 %exitcond, label %for.end, label %for.body
b1: ; preds = %b1, %b0
%v0 = phi i32 [ 0, %b0 ], [ %v3, %b1 ]
%v1 = getelementptr inbounds [25000 x i32], [25000 x i32]* @g0, i32 0, i32 %v0
store i32 %v0, i32* %v1, align 4
%v2 = getelementptr inbounds [25000 x i32], [25000 x i32]* @g1, i32 0, i32 %v0
store i32 %v0, i32* %v2, align 4
%v3 = add nsw i32 %v0, 1
%v4 = icmp eq i32 %v3, 25000
br i1 %v4, label %b2, label %b1
for.end: ; preds = %for.body
b2: ; preds = %b1
ret i32 0
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,63 +1,64 @@
; RUN: llc < %s -march=hexagon -mcpu=hexagonv4 -O2 -disable-lsr | FileCheck %s
; ModuleID = 'hwloop-dbg.o'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"
; RUN: llc < %s -march=hexagon -disable-lsr | FileCheck %s
define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind !dbg !5 {
entry:
tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !17
tail call void @llvm.dbg.value(metadata i32* %b, i64 0, metadata !14, metadata !DIExpression()), !dbg !18
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !DIExpression()), !dbg !19
br label %for.body, !dbg !19
for.body: ; preds = %for.body, %entry
; CHECK: loop0(
; CHECK-NOT: add({{r[0-9]*}}, #
; CHECK: endloop0
%arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
%incdec.ptr = getelementptr inbounds i32, i32* %b.addr.01, i32 1, !dbg !21
tail call void @llvm.dbg.value(metadata i32* %incdec.ptr, i64 0, metadata !14, metadata !DIExpression()), !dbg !21
%0 = load i32, i32* %b.addr.01, align 4, !dbg !21
store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21
%inc = add nsw i32 %i.02, 1, !dbg !26
tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !15, metadata !DIExpression()), !dbg !26
%exitcond = icmp eq i32 %inc, 10, !dbg !19
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end, label %for.body, !dbg !19
for.end: ; preds = %for.body
ret void, !dbg !27
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"
define void @f0(i32* nocapture %a0, i32* nocapture %a1) #0 !dbg !4 {
b0:
call void @llvm.dbg.value(metadata i32* %a0, metadata !10, metadata !DIExpression()), !dbg !14
call void @llvm.dbg.value(metadata i32* %a1, metadata !11, metadata !DIExpression()), !dbg !15
call void @llvm.dbg.value(metadata i32 0, metadata !12, metadata !DIExpression()), !dbg !16
br label %b1, !dbg !16
b1: ; preds = %b1, %b0
%v0 = phi i32* [ %a0, %b0 ], [ %v7, %b1 ]
%v1 = phi i32 [ 0, %b0 ], [ %v5, %b1 ]
%v2 = phi i32* [ %a1, %b0 ], [ %v3, %b1 ]
%v3 = getelementptr inbounds i32, i32* %v2, i32 1, !dbg !18
call void @llvm.dbg.value(metadata i32* %v3, metadata !11, metadata !DIExpression()), !dbg !18
%v4 = load i32, i32* %v2, align 4, !dbg !18
store i32 %v4, i32* %v0, align 4, !dbg !18
%v5 = add nsw i32 %v1, 1, !dbg !20
call void @llvm.dbg.value(metadata i32 %v5, metadata !12, metadata !DIExpression()), !dbg !20
%v6 = icmp eq i32 %v5, 10, !dbg !16
%v7 = getelementptr i32, i32* %v0, i32 1
br i1 %v6, label %b2, label %b1, !dbg !16
b2: ; preds = %b1
ret void, !dbg !21
}
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { nounwind "target-cpu"="hexagonv5" }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!29}
!llvm.module.flags = !{!3}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", isOptimized: true, emissionKind: FullDebug, file: !28, enums: !2, retainedTypes: !2, globals: !2)
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "QuIC LLVM Hexagon Clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2)
!1 = !DIFile(filename: "hwloop-dbg.c", directory: "/test")
!2 = !{}
!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !28, scope: null, type: !7, retainedNodes: !11)
!6 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9, !9}
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !10)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !{!13, !14, !15}
!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !5, file: !6, type: !9)
!14 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !5, file: !6, type: !9)
!15 = !DILocalVariable(name: "i", line: 2, scope: !16, file: !6, type: !10)
!16 = distinct !DILexicalBlock(line: 1, column: 26, file: !28, scope: !5)
!17 = !DILocation(line: 1, column: 15, scope: !5)
!18 = !DILocation(line: 1, column: 23, scope: !5)
!19 = !DILocation(line: 3, column: 8, scope: !20)
!20 = distinct !DILexicalBlock(line: 3, column: 3, file: !28, scope: !16)
!21 = !DILocation(line: 4, column: 5, scope: !22)
!22 = distinct !DILexicalBlock(line: 3, column: 28, file: !28, scope: !20)
!26 = !DILocation(line: 3, column: 23, scope: !20)
!27 = !DILocation(line: 6, column: 1, scope: !16)
!28 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
!29 = !{i32 1, !"Debug Info Version", i32 3}
!30 = !{i32 0}
!3 = !{i32 1, !"Debug Info Version", i32 3}
!4 = distinct !DISubprogram(name: "foo", scope: null, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !9)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7, !7}
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 32, align: 32)
!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10, !11, !12}
!10 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1, type: !7)
!11 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 1, type: !7)
!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 2, type: !8)
!13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 1, column: 26)
!14 = !DILocation(line: 1, column: 15, scope: !4)
!15 = !DILocation(line: 1, column: 23, scope: !4)
!16 = !DILocation(line: 3, column: 8, scope: !17)
!17 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 3)
!18 = !DILocation(line: 4, column: 5, scope: !19)
!19 = distinct !DILexicalBlock(scope: !17, file: !1, line: 3, column: 28)
!20 = !DILocation(line: 3, column: 23, scope: !17)
!21 = !DILocation(line: 6, column: 1, scope: !13)

View File

@ -1,438 +1,408 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s
; CHECK: test_pos1_ir_sle
; CHECK-LABEL: f0:
; CHECK: loop0
; a < b
define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 28395, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f0(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 28395, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 1
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 28395, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 1
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos2_ir_sle
; CHECK-LABEL: f1:
; CHECK: loop0
; a < b
define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 9073, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f1(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 9073, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 2
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 9073, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 2
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos4_ir_sle
; CHECK-LABEL: f2:
; CHECK: loop0
; a < b
define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 21956, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f2(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 21956, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 4
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 21956, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 4
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos8_ir_sle
; CHECK-LABEL: f3:
; CHECK: loop0
; a < b
define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 16782, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f3(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 16782, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 8
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 16782, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 8
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos16_ir_sle
; CHECK-LABEL: f4:
; CHECK: loop0
; a < b
define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 19097, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f4(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 19097, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 16
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 19097, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 16
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos1_ri_sle
; CHECK-LABEL: f5:
; CHECK: loop0
; a < b
define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, 14040
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f5(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, 14040
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 1
%cmp = icmp sle i32 %inc, 14040
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 1
%v8 = icmp sle i32 %v7, 14040
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos2_ri_sle
; CHECK-LABEL: f6:
; CHECK: loop0
; a < b
define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, 13710
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f6(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, 13710
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 2
%cmp = icmp sle i32 %inc, 13710
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 2
%v8 = icmp sle i32 %v7, 13710
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos4_ri_sle
; CHECK-LABEL: f7:
; CHECK: loop0
; a < b
define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, 9920
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f7(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, 9920
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 4
%cmp = icmp sle i32 %inc, 9920
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 4
%v8 = icmp sle i32 %v7, 9920
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos8_ri_sle
; CHECK-LABEL: f8:
; CHECK: loop0
; a < b
define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, 18924
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f8(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, 18924
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 8
%cmp = icmp sle i32 %inc, 18924
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 8
%v8 = icmp sle i32 %v7, 18924
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos16_ri_sle
; CHECK-LABEL: f9:
; CHECK: loop0
; a < b
define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, 11812
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f9(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, 11812
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 16
%cmp = icmp sle i32 %inc, 11812
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 16
%v8 = icmp sle i32 %v7, 11812
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos1_rr_sle
; CHECK-LABEL: f10:
; CHECK: loop0
; a < b
define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f10(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 1
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 1
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos2_rr_sle
; CHECK-LABEL: f11:
; CHECK: loop0
; a < b
define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f11(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 2
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 2
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos4_rr_sle
; CHECK-LABEL: f12:
; CHECK: loop0
; a < b
define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f12(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 4
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 4
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos8_rr_sle
; CHECK-LABEL: f13:
; CHECK: loop0
; a < b
define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f13(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 8
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 8
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos16_rr_sle
; CHECK-LABEL: f14:
; CHECK: loop0
; a < b
define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp sle i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f14(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sle i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 16
%cmp = icmp sle i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 16
%v8 = icmp sle i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,438 +1,408 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
; RUN: llc -march=hexagon -O3 < %s | FileCheck %s
; CHECK: test_pos1_ir_ne
; CHECK-LABEL: f0:
; CHECK: loop0
; a < b
define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 32623, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f0(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 32623, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 1
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 32623, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 1
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos2_ir_ne
; CHECK-LABEL: f1:
; CHECK: loop0
; a < b
define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 29554, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f1(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 29554, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 2
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 29554, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 2
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos4_ir_ne
; CHECK-LABEL: f2:
; CHECK: loop0
; a < b
define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 15692, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f2(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 15692, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 4
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 15692, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 4
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos8_ir_ne
; CHECK-LABEL: f3:
; CHECK: loop0
; a < b
define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 10449, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f3(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 10449, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 8
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 10449, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 8
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos16_ir_ne
; CHECK-LABEL: f4:
; CHECK: loop0
; a < b
define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 32087, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f4(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 32087, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 16
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ 32087, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 16
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos1_ri_ne
; CHECK-LABEL: f5:
; CHECK: loop0
; a < b
define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, 3472
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f5(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, 3472
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 1
%cmp = icmp ne i32 %inc, 3472
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 1
%v8 = icmp ne i32 %v7, 3472
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos2_ri_ne
; CHECK-LABEL: f6:
; CHECK: loop0
; a < b
define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, 8730
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f6(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, 8730
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 2
%cmp = icmp ne i32 %inc, 8730
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 2
%v8 = icmp ne i32 %v7, 8730
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos4_ri_ne
; CHECK-LABEL: f7:
; CHECK: loop0
; a < b
define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, 1493
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f7(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, 1493
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 4
%cmp = icmp ne i32 %inc, 1493
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 4
%v8 = icmp ne i32 %v7, 1493
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos8_ri_ne
; CHECK-LABEL: f8:
; CHECK: loop0
; a < b
define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, 1706
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f8(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, 1706
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 8
%cmp = icmp ne i32 %inc, 1706
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 8
%v8 = icmp ne i32 %v7, 1706
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos16_ri_ne
; CHECK-LABEL: f9:
; CHECK: loop0
; a < b
define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, 1886
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f9(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, 1886
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 16
%cmp = icmp ne i32 %inc, 1886
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 16
%v8 = icmp ne i32 %v7, 1886
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos1_rr_ne
; CHECK-LABEL: f10:
; CHECK: loop0
; a < b
define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f10(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 1
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 1
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos2_rr_ne
; CHECK-LABEL: f11:
; CHECK: loop0
; a < b
define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f11(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 2
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 2
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos4_rr_ne
; CHECK-LABEL: f12:
; CHECK: loop0
; a < b
define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f12(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 4
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 4
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos8_rr_ne
; CHECK-LABEL: f13
; CHECK: loop0
; a < b
define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f13(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 8
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 8
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
; CHECK: test_pos16_rr_ne
; CHECK-LABEL: f14
; CHECK: loop0
; a < b
define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
define void @f14(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp slt i32 %a1, %a2
br i1 %v0, label %b1, label %b3
for.body.lr.ph: ; preds = %entry
br label %for.body
b1: ; preds = %b0
br label %b2
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
store i8 %conv1, i8* %arrayidx, align 1
%inc = add nsw i32 %i.04, 16
%cmp = icmp ne i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
b2: ; preds = %b2, %b1
%v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
%v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
%v3 = load i8, i8* %v2, align 1
%v4 = zext i8 %v3 to i32
%v5 = add nsw i32 %v4, 1
%v6 = trunc i32 %v5 to i8
store i8 %v6, i8* %v2, align 1
%v7 = add nsw i32 %v1, 16
%v8 = icmp ne i32 %v7, %a2
br i1 %v8, label %b2, label %b3
for.end: ; preds = %for.body, %entry
b3: ; preds = %b2, %b0
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,40 +1,36 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: dfcmp
@a_str = internal constant [8 x i8] c"a = %f\0A\00"
@b_str = internal constant [8 x i8] c"b = %f\0A\00"
@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
@A = global double 2.000000e+00
@B = global double 5.000000e+00
@g0 = internal constant [12 x i8] c"a < b = %d\0A\00"
@g1 = internal constant [13 x i8] c"a <= b = %d\0A\00"
@g2 = internal constant [12 x i8] c"a > b = %d\0A\00"
@g3 = internal constant [13 x i8] c"a >= b = %d\0A\00"
@g4 = internal constant [13 x i8] c"a == b = %d\0A\00"
@g5 = internal constant [13 x i8] c"a != b = %d\0A\00"
@g6 = global double 2.000000e+00
@g7 = global double 5.000000e+00
declare i32 @printf(i8*, ...)
declare i32 @f0(i8*, ...) #0
define i32 @main() {
%a = load double, double* @A
%b = load double, double* @B
%lt_r = fcmp olt double %a, %b
%le_r = fcmp ole double %a, %b
%gt_r = fcmp ogt double %a, %b
%ge_r = fcmp oge double %a, %b
%eq_r = fcmp oeq double %a, %b
%ne_r = fcmp une double %a, %b
%val1 = zext i1 %lt_r to i16
%lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
%le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
%gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
%ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
%eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
%ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
call i32 (i8*, ...) @printf( i8* %lt_s, i16 %val1 )
ret i32 0
define i32 @f1() #0 {
b0:
%v0 = load double, double* @g6
%v1 = load double, double* @g7
%v2 = fcmp olt double %v0, %v1
%v3 = fcmp ole double %v0, %v1
%v4 = fcmp ogt double %v0, %v1
%v5 = fcmp oge double %v0, %v1
%v6 = fcmp oeq double %v0, %v1
%v7 = fcmp une double %v0, %v1
%v8 = zext i1 %v2 to i16
%v9 = getelementptr [12 x i8], [12 x i8]* @g0, i64 0, i64 0
%v10 = getelementptr [13 x i8], [13 x i8]* @g1, i64 0, i64 0
%v11 = getelementptr [12 x i8], [12 x i8]* @g2, i64 0, i64 0
%v12 = getelementptr [13 x i8], [13 x i8]* @g3, i64 0, i64 0
%v13 = getelementptr [13 x i8], [13 x i8]* @g4, i64 0, i64 0
%v14 = getelementptr [13 x i8], [13 x i8]* @g5, i64 0, i64 0
%v15 = call i32 (i8*, ...) @f0(i8* %v9, i16 %v8)
ret i32 0
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,44 +1,40 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: call __hexagon_{{[_A-Za-z0-9]+}}
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: dfcmp
@a_str = internal constant [8 x i8] c"a = %f\0A\00"
@b_str = internal constant [8 x i8] c"b = %f\0A\00"
@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
@A = global double 2.000000e+00
@B = global double 5.000000e+00
@g0 = internal constant [12 x i8] c"a < b = %d\0A\00"
@g1 = internal constant [13 x i8] c"a <= b = %d\0A\00"
@g2 = internal constant [12 x i8] c"a > b = %d\0A\00"
@g3 = internal constant [13 x i8] c"a >= b = %d\0A\00"
@g4 = internal constant [13 x i8] c"a == b = %d\0A\00"
@g5 = internal constant [13 x i8] c"a != b = %d\0A\00"
@g6 = global double 2.000000e+00
@g7 = global double 5.000000e+00
declare i32 @printf(i8*, ...)
declare i32 @f0(i8*, ...) #0
define i32 @main() {
%a = load double, double* @A
%b = load double, double* @B
%lt_r = fcmp olt double %a, %b
%le_r = fcmp ole double %a, %b
%gt_r = fcmp ogt double %a, %b
%ge_r = fcmp oge double %a, %b
%eq_r = fcmp oeq double %a, %b
%ne_r = fcmp une double %a, %b
%lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
%le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
%gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
%ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
%eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
%ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r )
call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r )
call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r )
call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r )
call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r )
call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r )
ret i32 0
define i32 @f1() #0 {
b0:
%v0 = load double, double* @g6
%v1 = load double, double* @g7
%v2 = fcmp olt double %v0, %v1
%v3 = fcmp ole double %v0, %v1
%v4 = fcmp ogt double %v0, %v1
%v5 = fcmp oge double %v0, %v1
%v6 = fcmp oeq double %v0, %v1
%v7 = fcmp une double %v0, %v1
%v8 = getelementptr [12 x i8], [12 x i8]* @g0, i64 0, i64 0
%v9 = getelementptr [13 x i8], [13 x i8]* @g1, i64 0, i64 0
%v10 = getelementptr [12 x i8], [12 x i8]* @g2, i64 0, i64 0
%v11 = getelementptr [13 x i8], [13 x i8]* @g3, i64 0, i64 0
%v12 = getelementptr [13 x i8], [13 x i8]* @g4, i64 0, i64 0
%v13 = getelementptr [13 x i8], [13 x i8]* @g5, i64 0, i64 0
%v14 = call i32 (i8*, ...) @f0(i8* %v8, i1 %v2)
%v15 = call i32 (i8*, ...) @f0(i8* %v9, i1 %v3)
%v16 = call i32 (i8*, ...) @f0(i8* %v10, i1 %v4)
%v17 = call i32 (i8*, ...) @f0(i8* %v11, i1 %v5)
%v18 = call i32 (i8*, ...) @f0(i8* %v12, i1 %v6)
%v19 = call i32 (i8*, ...) @f0(i8* %v13, i1 %v7)
ret i32 0
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,40 +1,36 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: dfcmp
@a_str = internal constant [8 x i8] c"a = %f\0A\00"
@b_str = internal constant [8 x i8] c"b = %f\0A\00"
@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
@A = global double 2.000000e+00
@B = global double 5.000000e+00
@g0 = internal constant [12 x i8] c"a < b = %d\0A\00"
@g1 = internal constant [13 x i8] c"a <= b = %d\0A\00"
@g2 = internal constant [12 x i8] c"a > b = %d\0A\00"
@g3 = internal constant [13 x i8] c"a >= b = %d\0A\00"
@g4 = internal constant [13 x i8] c"a == b = %d\0A\00"
@g5 = internal constant [13 x i8] c"a != b = %d\0A\00"
@g6 = global double 2.000000e+00
@g7 = global double 5.000000e+00
declare i32 @printf(i8*, ...)
declare i32 @f0(i8*, ...) #0
define i32 @main() {
%a = load double, double* @A
%b = load double, double* @B
%lt_r = fcmp olt double %a, %b
%le_r = fcmp ole double %a, %b
%gt_r = fcmp ogt double %a, %b
%ge_r = fcmp oge double %a, %b
%eq_r = fcmp oeq double %a, %b
%ne_r = fcmp une double %a, %b
%val1 = zext i1 %lt_r to i8
%lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
%le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
%gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
%ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
%eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
%ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
call i32 (i8*, ...) @printf( i8* %lt_s, i8 %val1 )
ret i32 0
define i32 @f1() #0 {
b0:
%v0 = load double, double* @g6
%v1 = load double, double* @g7
%v2 = fcmp olt double %v0, %v1
%v3 = fcmp ole double %v0, %v1
%v4 = fcmp ogt double %v0, %v1
%v5 = fcmp oge double %v0, %v1
%v6 = fcmp oeq double %v0, %v1
%v7 = fcmp une double %v0, %v1
%v8 = zext i1 %v2 to i8
%v9 = getelementptr [12 x i8], [12 x i8]* @g0, i64 0, i64 0
%v10 = getelementptr [13 x i8], [13 x i8]* @g1, i64 0, i64 0
%v11 = getelementptr [12 x i8], [12 x i8]* @g2, i64 0, i64 0
%v12 = getelementptr [13 x i8], [13 x i8]* @g3, i64 0, i64 0
%v13 = getelementptr [13 x i8], [13 x i8]* @g4, i64 0, i64 0
%v14 = getelementptr [13 x i8], [13 x i8]* @g5, i64 0, i64 0
%v15 = call i32 (i8*, ...) @f0(i8* %v9, i8 %v8)
ret i32 0
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,14 +1,15 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that we generate integer multiply accumulate.
; CHECK: r{{[0-9]+}} {{\+|\-}}= mpyi(r{{[0-9]+}},
define i32 @main(i32* %a, i32* %b) nounwind {
entry:
%0 = load i32, i32* %a, align 4
%div = udiv i32 %0, 10000
%rem = urem i32 %div, 10
store i32 %rem, i32* %b, align 4
define i32 @f0(i32* %a0, i32* %a1) #0 {
b0:
%v0 = load i32, i32* %a0, align 4
%v1 = udiv i32 %v0, 10000
%v2 = urem i32 %v1, 10
store i32 %v2, i32* %a1, align 4
ret i32 0
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,16 +1,19 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s
; RUN: llc -march=hexagon < %s
; Check that the mis-aligned load doesn't cause compiler to assert.
declare i32 @_hi(i64) #1
@temp1 = common global i32 0, align 4
@g0 = common global i32 0, align 4
define i32 @CSDRSEARCH_executeSearchManager() #0 {
entry:
%temp = alloca i32, align 4
%0 = load i32, i32* @temp1, align 4
store i32 %0, i32* %temp, align 4
%1 = bitcast i32* %temp to i64*
%2 = load i64, i64* %1, align 8
%call = call i32 @_hi(i64 %2)
ret i32 %call
declare i32 @f0(i64) #0
define i32 @f1() #0 {
b0:
%v0 = alloca i32, align 4
%v1 = load i32, i32* @g0, align 4
store i32 %v1, i32* %v0, align 4
%v2 = bitcast i32* %v0 to i64*
%v3 = load i64, i64* %v2, align 8
%v4 = call i32 @f0(i64 %v3)
ret i32 %v4
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,19 +1,21 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: += mpyi
define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
entry:
%acc.addr = alloca i32, align 4
%num.addr = alloca i32, align 4
%num2.addr = alloca i32, align 4
store i32 %acc, i32* %acc.addr, align 4
store i32 %num, i32* %num.addr, align 4
store i32 %num2, i32* %num2.addr, align 4
%0 = load i32, i32* %num.addr, align 4
%1 = load i32, i32* %acc.addr, align 4
%mul = mul nsw i32 %0, %1
%2 = load i32, i32* %num2.addr, align 4
%add = add nsw i32 %mul, %2
store i32 %add, i32* %num.addr, align 4
define void @f0(i32 %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = alloca i32, align 4
%v1 = alloca i32, align 4
%v2 = alloca i32, align 4
store i32 %a0, i32* %v0, align 4
store i32 %a1, i32* %v1, align 4
store i32 %a2, i32* %v2, align 4
%v3 = load i32, i32* %v1, align 4
%v4 = load i32, i32* %v0, align 4
%v5 = mul nsw i32 %v3, %v4
%v6 = load i32, i32* %v2, align 4
%v7 = add nsw i32 %v5, %v6
store i32 %v7, i32* %v1, align 4
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,33 +1,36 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that we generate new value jump.
@i = global i32 0, align 4
@j = global i32 10, align 4
define i32 @foo(i32 %a) nounwind {
entry:
; CHECK: if (cmp.eq(r{{[0-9]+}}.new,#0)) jump{{.}}
%addr1 = alloca i32, align 4
%addr2 = alloca i32, align 4
%0 = load i32, i32* @i, align 4
store i32 %0, i32* %addr1, align 4
call void @bar(i32 1, i32 2)
%1 = load i32, i32* @j, align 4
%tobool = icmp ne i32 %1, 0
br i1 %tobool, label %if.then, label %if.else
if.then:
call void @baz(i32 1, i32 2)
br label %if.end
@g0 = global i32 0, align 4
@g1 = global i32 10, align 4
if.else:
call void @guy(i32 10, i32 20)
br label %if.end
define i32 @f0(i32 %a0) #0 {
b0:
%v0 = alloca i32, align 4
%v1 = alloca i32, align 4
%v2 = load i32, i32* @g0, align 4
store i32 %v2, i32* %v0, align 4
call void @f2(i32 1, i32 2)
%v3 = load i32, i32* @g1, align 4
%v4 = icmp ne i32 %v3, 0
br i1 %v4, label %b1, label %b2
if.end:
b1: ; preds = %b0
call void @f3(i32 1, i32 2)
br label %b3
b2: ; preds = %b0
call void @f1(i32 10, i32 20)
br label %b3
b3: ; preds = %b2, %b1
ret i32 0
}
declare void @guy(i32, i32)
declare void @bar(i32, i32)
declare void @baz(i32, i32)
declare void @f1(i32, i32) #0
declare void @f2(i32, i32) #0
declare void @f3(i32, i32) #0
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,10 +1,8 @@
; RUN: llc -mcpu=hexagonv4 -tail-dup-size=1 < %s | FileCheck %s
; RUN: llc -march=hexagon -tail-dup-size=1 < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon-unknown--elf"
; Make sure we put the two conditionally executed adds in a packet.
; ifcnv_add:
; {
; p0 = cmp.gt(r2, r1)
; if (!p0.new) r0 = add(r2, r1)
@ -13,20 +11,23 @@ target triple = "hexagon-unknown--elf"
; CHECK: cmp
; CHECK-NEXT: add
; CHECK-NEXT: add
define i32 @ifcnv_add(i32, i32, i32) nounwind readnone {
%4 = icmp sgt i32 %2, %1
br i1 %4, label %5, label %7
define i32 @f0(i32 %a0, i32 %a1, i32 %a2) #0 {
b0:
%v0 = icmp sgt i32 %a2, %a1
br i1 %v0, label %b1, label %b2
; <label>:5 ; preds = %3
%6 = add nsw i32 %0, 10
br label %9
b1: ; preds = %b0
%v1 = add nsw i32 %a0, 10
br label %b3
; <label>:7 ; preds = %3
%8 = add nsw i32 %2, %1
br label %9
b2: ; preds = %b0
%v2 = add nsw i32 %a2, %a1
br label %b3
; <label>:9 ; preds = %7, %5
%10 = phi i32 [ %6, %5 ], [ %8, %7 ]
%11 = add nsw i32 %10, 1
ret i32 %11
b3: ; preds = %b2, %b1
%v3 = phi i32 [ %v1, %b1 ], [ %v2, %b2 ]
%v4 = add nsw i32 %v3, 1
ret i32 %v4
}
attributes #0 = { nounwind readnone "target-cpu"="hexagonv5" }

View File

@ -1,29 +1,30 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that post-increment load instructions are being generated.
; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}++#4)
define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
entry:
br label %for.body
define i32 @f0(i32* nocapture %a0, i16* nocapture %a1, i32 %a2) #0 {
b0:
br label %b1
for.body:
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
%arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
%arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
%sum.03 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
%0 = load i32, i32* %arrayidx.phi, align 4
%1 = load i16, i16* %arrayidx1.phi, align 2
%conv = sext i16 %1 to i32
%add = add i32 %0, %sum.03
%add2 = add i32 %add, %conv
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
%arrayidx1.inc = getelementptr i16, i16* %arrayidx1.phi, i32 1
%lsr.iv.next = add i32 %lsr.iv, -1
%exitcond = icmp eq i32 %lsr.iv.next, 0
br i1 %exitcond, label %for.end, label %for.body
b1: ; preds = %b1, %b0
%v0 = phi i32 [ %v11, %b1 ], [ 10, %b0 ]
%v1 = phi i32* [ %a0, %b0 ], [ %v9, %b1 ]
%v2 = phi i16* [ %a1, %b0 ], [ %v10, %b1 ]
%v3 = phi i32 [ 0, %b0 ], [ %v8, %b1 ]
%v4 = load i32, i32* %v1, align 4
%v5 = load i16, i16* %v2, align 2
%v6 = sext i16 %v5 to i32
%v7 = add i32 %v4, %v3
%v8 = add i32 %v7, %v6
%v9 = getelementptr i32, i32* %v1, i32 1
%v10 = getelementptr i16, i16* %v2, i32 1
%v11 = add i32 %v0, -1
%v12 = icmp eq i32 %v11, 0
br i1 %v12, label %b2, label %b1
for.end:
ret i32 %add2
b2: ; preds = %b1
ret i32 %v8
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,29 +1,30 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that post-increment store instructions are being generated.
; CHECK: memw(r{{[0-9]+}}++#4) = r{{[0-9]+}}
define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
entry:
br label %for.body
define i32 @f0(i32* nocapture %a0, i16* nocapture %a1, i32 %a2) #0 {
b0:
br label %b1
for.body: ; preds = %for.body, %entry
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
%arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
%arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
%0 = load i32, i32* %arrayidx.phi, align 4
%1 = load i16, i16* %arrayidx1.phi, align 2
%conv = sext i16 %1 to i32
%factor = mul i32 %0, 2
%add3 = add i32 %factor, %conv
store i32 %add3, i32* %arrayidx.phi, align 4
b1: ; preds = %b1, %b0
%v0 = phi i32 [ %v10, %b1 ], [ 10, %b0 ]
%v1 = phi i32* [ %a0, %b0 ], [ %v8, %b1 ]
%v2 = phi i16* [ %a1, %b0 ], [ %v9, %b1 ]
%v3 = load i32, i32* %v1, align 4
%v4 = load i16, i16* %v2, align 2
%v5 = sext i16 %v4 to i32
%v6 = mul i32 %v3, 2
%v7 = add i32 %v6, %v5
store i32 %v7, i32* %v1, align 4
%v8 = getelementptr i32, i32* %v1, i32 1
%v9 = getelementptr i16, i16* %v2, i32 1
%v10 = add i32 %v0, -1
%v11 = icmp eq i32 %v10, 0
br i1 %v11, label %b2, label %b1
%arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
%arrayidx1.inc = getelementptr i16, i16* %arrayidx1.phi, i32 1
%lsr.iv.next = add i32 %lsr.iv, -1
%exitcond = icmp eq i32 %lsr.iv.next, 0
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
b2: ; preds = %b1
ret i32 0
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,28 +1,30 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that we are able to predicate instructions with gp-relative
; addressing mode.
@d = external global i32
@c = common global i32 0, align 4
; CHECK: if ({{!?}}p{{[0-3]+}}{{(.new)?}}) r{{[0-9]+}} = memw(##g{{[01]}})
; CHECK: if ({{!?}}p{{[0-3]+}}) r{{[0-9]+}} = memw(##g{{[01]}})
; Function Attrs: nounwind
define i32 @test2(i8 zeroext %a, i8 zeroext %b) #0 {
; CHECK: if ({{!?}}p{{[0-3]+}}{{(.new)?}}) r{{[0-9]+}} = memw(##{{[cd]}})
; CHECK: if ({{!?}}p{{[0-3]+}}) r{{[0-9]+}} = memw(##{{[cd]}})
entry:
%cmp = icmp eq i8 %a, %b
br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
@g0 = external global i32
@g1 = common global i32 0, align 4
entry.if.end_crit_edge:
%.pre = load i32, i32* @c, align 4
br label %if.end
define i32 @f0(i8 zeroext %a0, i8 zeroext %a1) #0 {
b0:
%v0 = icmp eq i8 %a0, %a1
br i1 %v0, label %b2, label %b1
if.then:
%0 = load i32, i32* @d, align 4
store i32 %0, i32* @c, align 4
br label %if.end
b1: ; preds = %b0
%v1 = load i32, i32* @g1, align 4
br label %b3
if.end:
%1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %0, %if.then ]
ret i32 %1
b2: ; preds = %b0
%v2 = load i32, i32* @g0, align 4
store i32 %v2, i32* @g1, align 4
br label %b3
b3: ; preds = %b2, %b1
%v3 = phi i32 [ %v1, %b1 ], [ %v2, %b2 ]
ret i32 %v3
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,30 +1,32 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that we are able to predicate instructions.
; CHECK: if ({{!?}}p{{[0-3]}}{{(.new)?}}) r{{[0-9]+}} = {{and|aslh}}
; CHECK: if ({{!?}}p{{[0-3]}}{{(.new)?}}) r{{[0-9]+}} = {{and|aslh}}
@a = external global i32
@d = external global i32
; Function Attrs: nounwind
define i32 @test1(i8 zeroext %la, i8 zeroext %lb) {
entry:
%cmp = icmp eq i8 %la, %lb
br i1 %cmp, label %if.then, label %if.else
@g0 = external global i32
@g1 = external global i32
if.then: ; preds = %entry
%conv1 = zext i8 %la to i32
%shl = shl nuw nsw i32 %conv1, 16
br label %if.end
define i32 @f0(i8 zeroext %a0, i8 zeroext %a1) #0 {
b0:
%v0 = icmp eq i8 %a0, %a1
br i1 %v0, label %b1, label %b2
if.else: ; preds = %entry
%and8 = and i8 %lb, %la
%and = zext i8 %and8 to i32
br label %if.end
b1: ; preds = %b0
%v1 = zext i8 %a0 to i32
%v2 = shl nuw nsw i32 %v1, 16
br label %b3
if.end: ; preds = %if.else, %if.then
%storemerge = phi i32 [ %and, %if.else ], [ %shl, %if.then ]
store i32 %storemerge, i32* @a, align 4
%0 = load i32, i32* @d, align 4
ret i32 %0
b2: ; preds = %b0
%v3 = and i8 %a1, %a0
%v4 = zext i8 %v3 to i32
br label %b3
b3: ; preds = %b2, %b1
%v5 = phi i32 [ %v4, %b2 ], [ %v2, %b1 ]
store i32 %v5, i32* @g0, align 4
%v6 = load i32, i32* @g1, align 4
ret i32 %v6
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,8 +1,10 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
; RUN: llc -march=hexagon -O3 < %s | FileCheck %s
; CHECK: r{{[0-9]+}} = p{{[0-9]+}}
define i1 @foo() {
entry:
define i1 @f0() #0 {
b0:
ret i1 false
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,6 +1,6 @@
; Test fix for PR-13709.
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: foo
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: f0
; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32)
; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32)
@ -13,64 +13,64 @@
; This makes the lsr instruction dead and it gets removed subsequently
; by a dead code removal pass.
%union.vect64 = type { i64 }
%union.vect32 = type { i32 }
define void @foo(%union.vect64* nocapture %sss_extracted_bit_rx_data_ptr,
%union.vect32* nocapture %s_even, %union.vect32* nocapture %s_odd,
i8* nocapture %scr_s_even_code_ptr, i8* nocapture %scr_s_odd_code_ptr)
nounwind {
entry:
%scevgep = getelementptr %union.vect64, %union.vect64* %sss_extracted_bit_rx_data_ptr, i32 1
%scevgep28 = getelementptr %union.vect32, %union.vect32* %s_odd, i32 1
%scevgep32 = getelementptr %union.vect32, %union.vect32* %s_even, i32 1
%scevgep36 = getelementptr i8, i8* %scr_s_odd_code_ptr, i32 1
%scevgep39 = getelementptr i8, i8* %scr_s_even_code_ptr, i32 1
br label %for.body
%s.0 = type { i64 }
%s.1 = type { i32 }
for.body: ; preds = %for.body, %entry
%lsr.iv42 = phi i32 [ %lsr.iv.next, %for.body ], [ 2, %entry ]
%lsr.iv40 = phi i8* [ %scevgep41, %for.body ], [ %scevgep39, %entry ]
%lsr.iv37 = phi i8* [ %scevgep38, %for.body ], [ %scevgep36, %entry ]
%lsr.iv33 = phi %union.vect32* [ %scevgep34, %for.body ], [ %scevgep32, %entry ]
%lsr.iv29 = phi %union.vect32* [ %scevgep30, %for.body ], [ %scevgep28, %entry ]
%lsr.iv = phi %union.vect64* [ %scevgep26, %for.body ], [ %scevgep, %entry ]
%predicate_1.023 = phi i8 [ undef, %entry ], [ %10, %for.body ]
%predicate.022 = phi i8 [ undef, %entry ], [ %9, %for.body ]
%val.021 = phi i64 [ undef, %entry ], [ %srcval, %for.body ]
%lsr.iv3335 = bitcast %union.vect32* %lsr.iv33 to i32*
%lsr.iv2931 = bitcast %union.vect32* %lsr.iv29 to i32*
%lsr.iv27 = bitcast %union.vect64* %lsr.iv to i64*
%0 = tail call i64 @llvm.hexagon.A2.vsubhs(i64 0, i64 %val.021)
%conv3 = sext i8 %predicate.022 to i32
%1 = trunc i64 %val.021 to i32
%2 = trunc i64 %0 to i32
%3 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv3, i32 %1, i32 %2)
store i32 %3, i32* %lsr.iv3335, align 4
%conv8 = sext i8 %predicate_1.023 to i32
%4 = lshr i64 %val.021, 32
%5 = trunc i64 %4 to i32
%6 = lshr i64 %0, 32
%7 = trunc i64 %6 to i32
%8 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv8, i32 %5, i32 %7)
store i32 %8, i32* %lsr.iv2931, align 4
%srcval = load i64, i64* %lsr.iv27, align 8
%9 = load i8, i8* %lsr.iv40, align 1
%10 = load i8, i8* %lsr.iv37, align 1
%lftr.wideiv = trunc i32 %lsr.iv42 to i8
%exitcond = icmp eq i8 %lftr.wideiv, 32
%scevgep26 = getelementptr %union.vect64, %union.vect64* %lsr.iv, i32 1
%scevgep30 = getelementptr %union.vect32, %union.vect32* %lsr.iv29, i32 1
%scevgep34 = getelementptr %union.vect32, %union.vect32* %lsr.iv33, i32 1
%scevgep38 = getelementptr i8, i8* %lsr.iv37, i32 1
%scevgep41 = getelementptr i8, i8* %lsr.iv40, i32 1
%lsr.iv.next = add i32 %lsr.iv42, 1
br i1 %exitcond, label %for.end, label %for.body
define void @f0(%s.0* nocapture %a0, %s.1* nocapture %a1, %s.1* nocapture %a2, i8* nocapture %a3, i8* nocapture %a4) #0 {
b0:
%v0 = getelementptr %s.0, %s.0* %a0, i32 1
%v1 = getelementptr %s.1, %s.1* %a2, i32 1
%v2 = getelementptr %s.1, %s.1* %a1, i32 1
%v3 = getelementptr i8, i8* %a4, i32 1
%v4 = getelementptr i8, i8* %a3, i32 1
br label %b1
for.end: ; preds = %for.body
b1: ; preds = %b1, %b0
%v5 = phi i32 [ %v38, %b1 ], [ 2, %b0 ]
%v6 = phi i8* [ %v37, %b1 ], [ %v4, %b0 ]
%v7 = phi i8* [ %v36, %b1 ], [ %v3, %b0 ]
%v8 = phi %s.1* [ %v35, %b1 ], [ %v2, %b0 ]
%v9 = phi %s.1* [ %v34, %b1 ], [ %v1, %b0 ]
%v10 = phi %s.0* [ %v33, %b1 ], [ %v0, %b0 ]
%v11 = phi i8 [ undef, %b0 ], [ %v30, %b1 ]
%v12 = phi i8 [ undef, %b0 ], [ %v29, %b1 ]
%v13 = phi i64 [ undef, %b0 ], [ %v28, %b1 ]
%v14 = bitcast %s.1* %v8 to i32*
%v15 = bitcast %s.1* %v9 to i32*
%v16 = bitcast %s.0* %v10 to i64*
%v17 = tail call i64 @llvm.hexagon.A2.vsubhs(i64 0, i64 %v13)
%v18 = sext i8 %v12 to i32
%v19 = trunc i64 %v13 to i32
%v20 = trunc i64 %v17 to i32
%v21 = tail call i32 @llvm.hexagon.C2.mux(i32 %v18, i32 %v19, i32 %v20)
store i32 %v21, i32* %v14, align 4
%v22 = sext i8 %v11 to i32
%v23 = lshr i64 %v13, 32
%v24 = trunc i64 %v23 to i32
%v25 = lshr i64 %v17, 32
%v26 = trunc i64 %v25 to i32
%v27 = tail call i32 @llvm.hexagon.C2.mux(i32 %v22, i32 %v24, i32 %v26)
store i32 %v27, i32* %v15, align 4
%v28 = load i64, i64* %v16, align 8
%v29 = load i8, i8* %v6, align 1
%v30 = load i8, i8* %v7, align 1
%v31 = trunc i32 %v5 to i8
%v32 = icmp eq i8 %v31, 32
%v33 = getelementptr %s.0, %s.0* %v10, i32 1
%v34 = getelementptr %s.1, %s.1* %v9, i32 1
%v35 = getelementptr %s.1, %s.1* %v8, i32 1
%v36 = getelementptr i8, i8* %v7, i32 1
%v37 = getelementptr i8, i8* %v6, i32 1
%v38 = add i32 %v5, 1
br i1 %v32, label %b2, label %b1
b2: ; preds = %b1
ret void
}
declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) nounwind readnone
declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) #1
declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) #1
declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) nounwind readnone
attributes #0 = { nounwind "target-cpu"="hexagonv5" }
attributes #1 = { nounwind readnone "target-cpu"="hexagonv5" }

View File

@ -1,14 +1,16 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: foo_empty
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: f0
; CHECK-NOT: allocframe
; CHECK-NOT: memd(r29
; CHECK: jump bar_empty
; CHECK: jump f1
define void @foo_empty(i32 %h) nounwind {
entry:
%add = add nsw i32 %h, 3
%call = tail call i32 bitcast (i32 (...)* @bar_empty to i32 (i32)*)(i32 %add) nounwind
define void @f0(i32 %a0) #0 {
b0:
%v0 = add nsw i32 %a0, 3
%v1 = tail call i32 bitcast (i32 (...)* @f1 to i32 (i32)*)(i32 %v0) #0
ret void
}
declare i32 @bar_empty(...)
declare i32 @f1(...) #0
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,19 +1,21 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: word
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: f0
; CHECK-NOT: combine(#0
; CHECK: jump bar
; CHECK: jump f1
define void @word(i32* nocapture %a) nounwind {
entry:
%0 = load i32, i32* %a, align 4
%1 = zext i32 %0 to i64
%add.ptr = getelementptr inbounds i32, i32* %a, i32 1
%2 = load i32, i32* %add.ptr, align 4
%3 = zext i32 %2 to i64
%4 = shl nuw i64 %3, 32
%ins = or i64 %4, %1
tail call void @bar(i64 %ins) nounwind
define void @f0(i32* nocapture %a0) #0 {
b0:
%v0 = load i32, i32* %a0, align 4
%v1 = zext i32 %v0 to i64
%v2 = getelementptr inbounds i32, i32* %a0, i32 1
%v3 = load i32, i32* %v2, align 4
%v4 = zext i32 %v3 to i64
%v5 = shl nuw i64 %v4, 32
%v6 = or i64 %v5, %v1
tail call void @f1(i64 %v6) #0
ret void
}
declare void @bar(i64)
declare void @f1(i64) #0
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,16 +1,19 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vaddh(r{{[0-9]+}},r{{[0-9]+}})
@j = external global i32
@k = external global i32
@g0 = external global i32
@g1 = external global i32
define void @foo() nounwind {
entry:
%0 = load i32, i32* @j, align 4
%1 = load i32, i32* @k, align 4
%2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
store i32 %2, i32* @k, align 4
define void @f0() #0 {
b0:
%v0 = load i32, i32* @g0, align 4
%v1 = load i32, i32* @g1, align 4
%v2 = call i32 @llvm.hexagon.A2.svaddh(i32 %v0, i32 %v1)
store i32 %v2, i32* @g1, align 4
ret void
}
declare i32 @llvm.hexagon.A2.svaddh(i32, i32) nounwind readnone
declare i32 @llvm.hexagon.A2.svaddh(i32, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv5" }
attributes #1 = { nounwind readnone "target-cpu"="hexagonv5" }

View File

@ -1,36 +1,38 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s -O0
; RUN: llc -march=hexagon -O0 < %s
; This is a regression test which makes sure that the offset check
; is available for STRiw_indexed instruction. This is required
; by 'Hexagon Expand Predicate Spill Code' pass.
define i32 @f(i32 %a, i32 %b) nounwind {
entry:
%retval = alloca i32, align 4
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
%0 = load i32, i32* %a.addr, align 4
%1 = load i32, i32* %b.addr, align 4
%cmp = icmp sgt i32 %0, %1
br i1 %cmp, label %if.then, label %if.else
define i32 @f0(i32 %a0, i32 %a1) #0 {
b0:
%v0 = alloca i32, align 4
%v1 = alloca i32, align 4
%v2 = alloca i32, align 4
store i32 %a0, i32* %v1, align 4
store i32 %a1, i32* %v2, align 4
%v3 = load i32, i32* %v1, align 4
%v4 = load i32, i32* %v2, align 4
%v5 = icmp sgt i32 %v3, %v4
br i1 %v5, label %b1, label %b2
if.then:
%2 = load i32, i32* %a.addr, align 4
%3 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %2, %3
store i32 %add, i32* %retval
br label %return
b1: ; preds = %b0
%v6 = load i32, i32* %v1, align 4
%v7 = load i32, i32* %v2, align 4
%v8 = add nsw i32 %v6, %v7
store i32 %v8, i32* %v0
br label %b3
if.else:
%4 = load i32, i32* %a.addr, align 4
%5 = load i32, i32* %b.addr, align 4
%sub = sub nsw i32 %4, %5
store i32 %sub, i32* %retval
br label %return
b2: ; preds = %b0
%v9 = load i32, i32* %v1, align 4
%v10 = load i32, i32* %v2, align 4
%v11 = sub nsw i32 %v9, %v10
store i32 %v11, i32* %v0
br label %b3
return:
%6 = load i32, i32* %retval
ret i32 %6
b3: ; preds = %b2, %b1
%v12 = load i32, i32* %v0
ret i32 %v12
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" }

View File

@ -1,10 +1,8 @@
# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv4 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V4 %s
# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv5 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V5 %s
# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv55 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V55 %s
# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv60 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V60 %s
# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv62 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V62 %s
# CHECK-V4: Flags: 0x3
# CHECK-V5: Flags: 0x4
# CHECK-V55: Flags: 0x5
# CHECK-V60: Flags: 0x60