[Hexagon] Change HVX vector predicate types from v512/1024i1 to v64/128i1

This commit removes the artificial types <512 x i1> and <1024 x i1>
from HVX intrinsics, and makes v512i1 and v1024i1 no longer legal on
Hexagon.

It may cause existing bitcode files to become invalid.

* Converting between vector predicates and vector registers must be
  done explicitly via vandvrt/vandqrt instructions (their intrinsics),
  i.e. (for 64-byte mode):
    %Q = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %V, i32 -1)
    %V = call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %Q, i32 -1)

  The conversion intrinsics are:
    declare  <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
    declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
    declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32)
    declare <32 x i32> @llvm.hexagon.V6.vandqrt.128B(<128 x i1>, i32)
  They are all pure.

* Vector predicate values cannot be loaded/stored directly. This directly
  reflects the architecture restriction. Loading and storing or vector
  predicates must be done indirectly via vector registers and explicit
  conversions via vandvrt/vandqrt instructions.
This commit is contained in:
Krzysztof Parzyszek 2020-02-07 09:33:18 -06:00
parent ba3f863dfb
commit b1d47467e2
68 changed files with 2103 additions and 1864 deletions

View File

@ -96,14 +96,14 @@ TARGET_BUILTIN(__builtin_HEXAGON_S2_storerd_pcr, "vv*iLLivC*", "", V5)
TARGET_BUILTIN(__builtin_HEXAGON_prefetch,"vv*","", V5)
TARGET_BUILTIN(__builtin_HEXAGON_A6_vminub_RdP,"LLiLLiLLi","", V62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq,"vV16iv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq,"vV16iv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq,"vV16iv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq,"vV16iv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq_128B,"vV32iv*V32i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq_128B,"vV32iv*V32i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq_128B,"vV32iv*V32i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq_128B,"vV32iv*V32i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq,"vV64bv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq,"vV64bv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq,"vV64bv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq,"vV64bv*V16i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstoreq_128B,"vV128bv*V32i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorenq_128B,"vV128bv*V32i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentq_128B,"vV128bv*V32i","", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaskedstorentnq_128B,"vV128bv*V32i","", HVXV60)
// These are only valid on v65

View File

@ -924,14 +924,14 @@ TARGET_BUILTIN(__builtin_HEXAGON_F2_dfmpyhh, "dddd", "", V67)
// V60 HVX Instructions.
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai, "vV16iv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai_128B, "vV32iv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai, "vV16iv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B, "vV32iv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai, "vV16iv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B, "vV32iv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai, "vV16iv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B, "vV32iv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai, "vV64bv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_qpred_ai_128B, "vV128bv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai, "vV64bv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B, "vV128bv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai, "vV64bv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B, "vV128bv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai, "vV64bv*V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B, "vV128bv*V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_valignb, "V16iV16iV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_valignb_128B, "V32iV32iV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vlalignb, "V16iV16iV16ii", "", HVXV60)
@ -1212,30 +1212,30 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubuhw, "V32iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubuhw_128B, "V64iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vd0, "V16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vd0_128B, "V32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddbnq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubbnq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddhnq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubhnq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddwnq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsubwnq_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh, "V16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh_128B, "V32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vabsh_sat, "V16iV16i", "", HVXV60)
@ -1346,104 +1346,104 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vxor, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vxor_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vnot, "V16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vnot_128B, "V32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt, "V16iV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_128B, "V32iV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc, "V16iV16iV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc_128B, "V32iV32iV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt, "V16iV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_128B, "V32iV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc, "V16iV16iV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc_128B, "V32iV32iV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not, "V16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not_128B, "V32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2, "V16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2_128B, "V32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux, "V16iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux_128B, "V32iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap, "V32iV16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap_128B, "V64iV32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt, "V16iV64bi", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_128B, "V32iV128bi", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc, "V16iV16iV64bi", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandqrt_acc_128B, "V32iV32iV128bi", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt, "V64bV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_128B, "V128bV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc, "V64bV64bV16ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvrt_acc_128B, "V128bV128bV32ii", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgth_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqh_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtb_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_veqb_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuw_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtuh_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub, "V64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_128B, "V128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_and_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_or_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor, "V64bV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtub_xor_128B, "V128bV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or, "V64bV64bV64b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_128B, "V128bV128bV128b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and, "V64bV64bV64b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_128B, "V128bV128bV128b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not, "V64bV64b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_not_128B, "V128bV128b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor, "V64bV64bV64b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_xor_128B, "V128bV128bV128b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n, "V64bV64bV64b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_and_n_128B, "V128bV128bV128b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n, "V64bV64bV64b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_or_n_128B, "V128bV128bV128b", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2, "V64bi", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2_128B, "V128bi", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux, "V16iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmux_128B, "V32iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap, "V32iV64bV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vswap_128B, "V64iV128bV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxub, "V16iV16iV16i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxub_128B, "V32iV32iV32i", "", HVXV60)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vminub, "V16iV16iV16i", "", HVXV60)
@ -1585,20 +1585,20 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub, "V16iV16ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_128B, "V32iV32ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc, "V16iV16iV16ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyiwub_acc_128B, "V32iV32iV32ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt, "V16iV16ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_128B, "V32iV32ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc, "V16iV16iV16ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc_128B, "V32iV32iV32ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv, "V16iV16iV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv_128B, "V32iV32iV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv, "V16iV16iV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv_128B, "V32iV32iV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2, "V16ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2_128B, "V32ii", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw, "V16iV16iV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw_128B, "V32iV32iV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh, "V16iV16iV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh_128B, "V32iV32iV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt, "V16iV64bi", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_128B, "V32iV128bi", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc, "V16iV16iV64bi", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandnqrt_acc_128B, "V32iV32iV128bi", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv, "V16iV64bV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvqv_128B, "V32iV128bV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv, "V16iV64bV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vandvnqv_128B, "V32iV128bV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2, "V64bi", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_pred_scalar2v2_128B, "V128bi", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw, "V64bV64bV64b", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqw_128B, "V128bV128bV128b", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh, "V64bV64bV64b", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_shuffeqh_128B, "V128bV128bV128b", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxb, "V16iV16iV16i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vmaxb_128B, "V32iV32iV32i", "", HVXV62)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vminb, "V16iV16iV16i", "", HVXV62)
@ -1678,12 +1678,12 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermh, "vv*iiV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermh_128B, "vv*iiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhw, "vv*iiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhw_128B, "vv*iiV64i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq, "vv*V16iiiV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq_128B, "vv*V32iiiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq, "vv*V16iiiV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq_128B, "vv*V32iiiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq, "vv*V16iiiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq_128B, "vv*V32iiiV64i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq, "vv*V64biiV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermwq_128B, "vv*V128biiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq, "vv*V64biiV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhq_128B, "vv*V128biiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq, "vv*V64biiV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vgathermhwq_128B, "vv*V128biiV64i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw, "viiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_128B, "viiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh, "viiV16iV16i", "", HVXV65)
@ -1692,22 +1692,22 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_add, "viiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermw_add_128B, "viiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh_add, "viiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermh_add_128B, "viiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq, "vV16iiiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq_128B, "vV32iiiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq, "vV16iiiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq_128B, "vV32iiiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq, "vV64biiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermwq_128B, "vV128biiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq, "vV64biiV16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhq_128B, "vV128biiV32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw, "viiV32iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_128B, "viiV64iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq, "vV16iiiV32iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq_128B, "vV32iiiV64iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq, "vV64biiV32iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhwq_128B, "vV128biiV64iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add, "viiV32iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vscattermhw_add_128B, "viiV64iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb, "V16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb_128B, "V32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh, "V16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh_128B, "V32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw, "V16iV16i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw_128B, "V32iV32i", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb, "V16iV64b", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqb_128B, "V32iV128b", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh, "V16iV64b", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqh_128B, "V32iV128b", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw, "V16iV64b", "", HVXV65)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vprefixqw_128B, "V32iV128b", "", HVXV65)
// V66 HVX Instructions.
@ -1715,7 +1715,7 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vrotr, "V16iV16iV16i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vrotr_128B, "V32iV32iV32i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vasr_into, "V32iV32iV16iV16i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vasr_into_128B, "V64iV64iV32iV32i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat, "V16iV16iV16iV16i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat_128B, "V32iV32iV32iV32i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat, "V16iV16iV16iV64b", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vaddcarrysat_128B, "V32iV32iV32iV128b", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsatdw, "V16iV16iV16i", "", HVXV66)
TARGET_BUILTIN(__builtin_HEXAGON_V6_vsatdw_128B, "V32iV32iV32i", "", HVXV66)

View File

@ -0,0 +1,206 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
CUSTOM_BUILTIN_MAPPING(M2_mpysmi, 0)
CUSTOM_BUILTIN_MAPPING(M2_dpmpyss_s0, 0)
CUSTOM_BUILTIN_MAPPING(M2_dpmpyuu_s0, 0)
CUSTOM_BUILTIN_MAPPING(M2_mpyi, 0)
CUSTOM_BUILTIN_MAPPING(M2_mpyui, 0)
CUSTOM_BUILTIN_MAPPING(A2_add, 0)
CUSTOM_BUILTIN_MAPPING(A2_sub, 0)
CUSTOM_BUILTIN_MAPPING(A2_addi, 0)
CUSTOM_BUILTIN_MAPPING(A2_addp, 0)
CUSTOM_BUILTIN_MAPPING(A2_subp, 0)
CUSTOM_BUILTIN_MAPPING(A2_neg, 0)
CUSTOM_BUILTIN_MAPPING(A2_zxtb, 0)
CUSTOM_BUILTIN_MAPPING(A2_sxtb, 0)
CUSTOM_BUILTIN_MAPPING(A2_zxth, 0)
CUSTOM_BUILTIN_MAPPING(A2_sxth, 0)
CUSTOM_BUILTIN_MAPPING(A2_and, 0)
CUSTOM_BUILTIN_MAPPING(A2_or, 0)
CUSTOM_BUILTIN_MAPPING(A2_xor, 0)
CUSTOM_BUILTIN_MAPPING(A2_not, 0)
CUSTOM_BUILTIN_MAPPING(A2_subri, 0)
CUSTOM_BUILTIN_MAPPING(A2_andir, 0)
CUSTOM_BUILTIN_MAPPING(A2_orir, 0)
CUSTOM_BUILTIN_MAPPING(S2_asr_i_r, 0)
CUSTOM_BUILTIN_MAPPING(S2_lsr_i_r, 0)
CUSTOM_BUILTIN_MAPPING(S2_asl_i_r, 0)
CUSTOM_BUILTIN_MAPPING(S2_asr_i_p, 0)
CUSTOM_BUILTIN_MAPPING(S2_lsr_i_p, 0)
CUSTOM_BUILTIN_MAPPING(S2_asl_i_p, 0)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai, 64)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai, 64)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai, 64)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai, 64)
CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddbq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddbq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vsubbq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vsubbq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddbnq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddbnq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vsubbnq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vsubbnq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddhq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddhq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vsubhq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vsubhq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddhnq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddhnq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vsubhnq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vsubhnq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddwq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddwq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vsubwq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vsubwq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddwnq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddwnq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vsubwnq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vsubwnq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandqrt, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandqrt_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandvrt, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandvrt_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtw, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtw_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtw_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtw_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtw_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtw_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqw, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqw_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqw_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqw_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqw_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqw_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqw_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqw_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgth, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgth_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgth_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgth_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgth_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgth_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgth_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgth_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqh, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqh_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqh_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqh_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqh_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqh_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqh_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqh_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtb, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtb_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtb_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtb_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtb_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtb_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqb, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqb_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqb_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqb_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqb_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqb_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_veqb_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_veqb_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtub, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtub_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtub_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtub_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtub_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtub_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_or, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_or_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_and, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_and_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_not, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_not_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_xor, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_xor_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_and_n, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_and_n_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_or_n, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_or_n_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vmux, 64)
CUSTOM_BUILTIN_MAPPING(V6_vmux_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vswap, 64)
CUSTOM_BUILTIN_MAPPING(V6_vswap_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddcarry, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddcarry_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vsubcarry, 64)
CUSTOM_BUILTIN_MAPPING(V6_vsubcarry_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandnqrt, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandvqv, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandvqv_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vandvnqv, 64)
CUSTOM_BUILTIN_MAPPING(V6_vandvnqv_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2, 64)
CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_shuffeqw, 64)
CUSTOM_BUILTIN_MAPPING(V6_shuffeqw_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_shuffeqh, 64)
CUSTOM_BUILTIN_MAPPING(V6_shuffeqh_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgathermwq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgathermwq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgathermhq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgathermhq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vscattermwq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vscattermwq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vscattermhq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vscattermhq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vprefixqb, 64)
CUSTOM_BUILTIN_MAPPING(V6_vprefixqb_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vprefixqh, 64)
CUSTOM_BUILTIN_MAPPING(V6_vprefixqh_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vprefixqw, 64)
CUSTOM_BUILTIN_MAPPING(V6_vprefixqw_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat, 64)
CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat_128B, 128)

View File

@ -39,6 +39,7 @@ module Clang_Basic {
textual header "Basic/Builtins.def"
textual header "Basic/BuiltinsHexagon.def"
textual header "Basic/BuiltinsHexagonDep.def"
textual header "Basic/BuiltinsHexagonMapCustomDep.def"
textual header "Basic/BuiltinsLe64.def"
textual header "Basic/BuiltinsMips.def"
textual header "Basic/BuiltinsNEON.def"

View File

@ -57,6 +57,12 @@ public:
LargeArrayAlign = 64;
UseBitFieldTypeAlignment = true;
ZeroLengthBitfieldBoundary = 32;
// These are the default values anyway, but explicitly make sure
// that the size of the boolean type is 8 bits. Bool vectors are used
// for modeling predicate registers in HVX, and the bool -> byte
// correspondence matches the HVX architecture.
BoolWidth = BoolAlign = 8;
}
ArrayRef<Builtin::Info> getTargetBuiltins() const override;

View File

@ -15007,10 +15007,46 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
}
}
static std::pair<Intrinsic::ID, unsigned>
getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) {
struct Info {
unsigned BuiltinID;
Intrinsic::ID IntrinsicID;
unsigned VecLen;
};
Info Infos[] = {
#define CUSTOM_BUILTIN_MAPPING(x,s) \
{ Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
#undef CUSTOM_BUILTIN_MAPPING
};
auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
(void)SortOnce;
const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos),
Info{BuiltinID, 0, 0}, CmpInfo);
if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
return {Intrinsic::not_intrinsic, 0};
return {F->IntrinsicID, F->VecLen};
}
Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
SmallVector<llvm::Value *, 4> Ops;
Intrinsic::ID ID = Intrinsic::not_intrinsic;
Intrinsic::ID ID;
unsigned VecLen;
std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID);
auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
// The base pointer is passed by address, so it needs to be loaded.
@ -15099,51 +15135,41 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractValue(Result, 1);
};
auto V2Q = [this, VecLen] (llvm::Value *Vec) {
Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
: Intrinsic::hexagon_V6_vandvrt;
return Builder.CreateCall(CGM.getIntrinsic(ID),
{Vec, Builder.getInt32(-1)});
};
auto Q2V = [this, VecLen] (llvm::Value *Pred) {
Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
: Intrinsic::hexagon_V6_vandqrt;
return Builder.CreateCall(CGM.getIntrinsic(ID),
{Pred, Builder.getInt32(-1)});
};
switch (BuiltinID) {
// These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
// and the corresponding C/C++ builtins use loads/stores to update
// the predicate.
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
Address Dest = EmitPointerWithAlignment(E->getArg(2));
unsigned Size;
if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
Size = 512;
ID = Intrinsic::hexagon_V6_vaddcarry;
} else {
Size = 1024;
ID = Intrinsic::hexagon_V6_vaddcarry_128B;
}
Dest = Builder.CreateBitCast(Dest,
llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
LoadInst *QLd = Builder.CreateLoad(Dest);
Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
Vprd->getType()->getPointerTo(0));
Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
return Builder.CreateExtractValue(Result, 0);
}
case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
Address Dest = EmitPointerWithAlignment(E->getArg(2));
unsigned Size;
if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
Size = 512;
ID = Intrinsic::hexagon_V6_vsubcarry;
} else {
Size = 1024;
ID = Intrinsic::hexagon_V6_vsubcarry_128B;
}
Dest = Builder.CreateBitCast(Dest,
llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
LoadInst *QLd = Builder.CreateLoad(Dest);
Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
Vprd->getType()->getPointerTo(0));
Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
// Get the type from the 0-th argument.
llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
Address PredAddr = Builder.CreateBitCast(
EmitPointerWithAlignment(E->getArg(2)), VecType->getPointerTo(0));
llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
PredAddr.getAlignment());
return Builder.CreateExtractValue(Result, 0);
}
case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
@ -15200,8 +15226,38 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
case Hexagon::BI__builtin_brev_ldd:
return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
default:
break;
default: {
if (ID == Intrinsic::not_intrinsic)
return nullptr;
auto IsVectorPredTy = [] (llvm::Type *T) {
return T->isVectorTy() && T->getVectorElementType()->isIntegerTy(1);
};
llvm::Function *IntrFn = CGM.getIntrinsic(ID);
llvm::FunctionType *IntrTy = IntrFn->getFunctionType();
SmallVector<llvm::Value*,4> Ops;
for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) {
llvm::Type *T = IntrTy->getParamType(i);
const Expr *A = E->getArg(i);
if (IsVectorPredTy(T)) {
// There will be an implicit cast to a boolean vector. Strip it.
if (auto *Cast = dyn_cast<ImplicitCastExpr>(A)) {
if (Cast->getCastKind() == CK_BitCast)
A = Cast->getSubExpr();
}
Ops.push_back(V2Q(EmitScalarExpr(A)));
} else {
Ops.push_back(EmitScalarExpr(A));
}
}
llvm::Value *Call = Builder.CreateCall(IntrFn, Ops);
if (IsVectorPredTy(IntrTy->getReturnType()))
Call = Q2V(Call);
return Call;
} // default
} // switch
return nullptr;

View File

@ -9,7 +9,7 @@ typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256)))
__attribute__((aligned(256)));
// CHECK-LABEL: @test1
// CHECK: call <32 x i32> @llvm.hexagon.V6.vaddcarrysat.128B(<32 x i32> %{{[0-9]+}}, <32 x i32> %{{[0-9]+}}, <1024 x i1> %{{[0-9]+}})
// CHECK: call <32 x i32> @llvm.hexagon.V6.vaddcarrysat.128B(<32 x i32> %{{[0-9]+}}, <32 x i32> %{{[0-9]+}}, <128 x i1> %{{[0-9]+}})
HEXAGON_Vect1024 test1(void *in, void *out) {
HEXAGON_Vect1024 v1, v2;
HEXAGON_Vect1024 *p;

View File

@ -33,7 +33,7 @@ typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128)))
__attribute__((aligned(128)));
// CHECK-LABEL: @test5
// CHECK: call <16 x i32> @llvm.hexagon.V6.vaddcarrysat(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <512 x i1> %{{[0-9]+}})
// CHECK: call <16 x i32> @llvm.hexagon.V6.vaddcarrysat(<16 x i32> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <64 x i1> %{{[0-9]+}})
HEXAGON_Vect512 test5(void *in, void *out) {
HEXAGON_Vect512 v1, v2;
HEXAGON_Vect512 *p;

View File

@ -2,6 +2,7 @@
// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length128b -emit-llvm %s -o - | FileCheck %s
void test() {
int q128 __attribute__((__vector_size__(128)));
int v128 __attribute__((__vector_size__(128)));
int v256 __attribute__((__vector_size__(256)));
@ -18,33 +19,33 @@ void test() {
// CHECK: @llvm.hexagon.V6.lvsplatw.128B
__builtin_HEXAGON_V6_lvsplatw_128B(0);
// CHECK: @llvm.hexagon.V6.pred.and.128B
__builtin_HEXAGON_V6_pred_and_128B(v128, v128);
__builtin_HEXAGON_V6_pred_and_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.and.n.128B
__builtin_HEXAGON_V6_pred_and_n_128B(v128, v128);
__builtin_HEXAGON_V6_pred_and_n_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.not.128B
__builtin_HEXAGON_V6_pred_not_128B(v128);
__builtin_HEXAGON_V6_pred_not_128B(q128);
// CHECK: @llvm.hexagon.V6.pred.or.128B
__builtin_HEXAGON_V6_pred_or_128B(v128, v128);
__builtin_HEXAGON_V6_pred_or_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.or.n.128B
__builtin_HEXAGON_V6_pred_or_n_128B(v128, v128);
__builtin_HEXAGON_V6_pred_or_n_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.pred.scalar2.128B
__builtin_HEXAGON_V6_pred_scalar2_128B(0);
// CHECK: @llvm.hexagon.V6.pred.scalar2v2.128B
__builtin_HEXAGON_V6_pred_scalar2v2_128B(0);
// CHECK: @llvm.hexagon.V6.pred.xor.128B
__builtin_HEXAGON_V6_pred_xor_128B(v128, v128);
__builtin_HEXAGON_V6_pred_xor_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.shuffeqh.128B
__builtin_HEXAGON_V6_shuffeqh_128B(v128, v128);
__builtin_HEXAGON_V6_shuffeqh_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.shuffeqw.128B
__builtin_HEXAGON_V6_shuffeqw_128B(v128, v128);
__builtin_HEXAGON_V6_shuffeqw_128B(q128, q128);
// CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai.128B
__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B
__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B
__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vS32b.qpred.ai.128B
__builtin_HEXAGON_V6_vS32b_qpred_ai_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vS32b_qpred_ai_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vabsb.128B
__builtin_HEXAGON_V6_vabsb_128B(v128);
// CHECK: @llvm.hexagon.V6.vabsb.sat.128B
@ -70,9 +71,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddb.dv.128B
__builtin_HEXAGON_V6_vaddb_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vaddbnq.128B
__builtin_HEXAGON_V6_vaddbnq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vaddbnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbq.128B
__builtin_HEXAGON_V6_vaddbq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vaddbq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbsat.128B
__builtin_HEXAGON_V6_vaddbsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbsat.dv.128B
@ -88,9 +89,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddh.dv.128B
__builtin_HEXAGON_V6_vaddh_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vaddhnq.128B
__builtin_HEXAGON_V6_vaddhnq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vaddhnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhq.128B
__builtin_HEXAGON_V6_vaddhq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vaddhq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhsat.128B
__builtin_HEXAGON_V6_vaddhsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhsat.dv.128B
@ -126,9 +127,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddw.dv.128B
__builtin_HEXAGON_V6_vaddw_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vaddwnq.128B
__builtin_HEXAGON_V6_vaddwnq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vaddwnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwq.128B
__builtin_HEXAGON_V6_vaddwq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vaddwq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwsat.128B
__builtin_HEXAGON_V6_vaddwsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwsat.dv.128B
@ -140,21 +141,21 @@ void test() {
// CHECK: @llvm.hexagon.V6.vand.128B
__builtin_HEXAGON_V6_vand_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vandnqrt.128B
__builtin_HEXAGON_V6_vandnqrt_128B(v128, 0);
__builtin_HEXAGON_V6_vandnqrt_128B(q128, 0);
// CHECK: @llvm.hexagon.V6.vandnqrt.acc.128B
__builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, v128, 0);
__builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, q128, 0);
// CHECK: @llvm.hexagon.V6.vandqrt.128B
__builtin_HEXAGON_V6_vandqrt_128B(v128, 0);
__builtin_HEXAGON_V6_vandqrt_128B(q128, 0);
// CHECK: @llvm.hexagon.V6.vandqrt.acc.128B
__builtin_HEXAGON_V6_vandqrt_acc_128B(v128, v128, 0);
__builtin_HEXAGON_V6_vandqrt_acc_128B(v128, q128, 0);
// CHECK: @llvm.hexagon.V6.vandvnqv.128B
__builtin_HEXAGON_V6_vandvnqv_128B(v128, v128);
__builtin_HEXAGON_V6_vandvnqv_128B(q128, v128);
// CHECK: @llvm.hexagon.V6.vandvqv.128B
__builtin_HEXAGON_V6_vandvqv_128B(v128, v128);
__builtin_HEXAGON_V6_vandvqv_128B(q128, v128);
// CHECK: @llvm.hexagon.V6.vandvrt.128B
__builtin_HEXAGON_V6_vandvrt_128B(v128, 0);
// CHECK: @llvm.hexagon.V6.vandvrt.acc.128B
__builtin_HEXAGON_V6_vandvrt_acc_128B(v128, v128, 0);
__builtin_HEXAGON_V6_vandvrt_acc_128B(q128, v128, 0);
// CHECK: @llvm.hexagon.V6.vaslh.128B
__builtin_HEXAGON_V6_vaslh_128B(v128, 0);
// CHECK: @llvm.hexagon.V6.vaslh.acc.128B
@ -296,87 +297,87 @@ void test() {
// CHECK: @llvm.hexagon.V6.veqb.128B
__builtin_HEXAGON_V6_veqb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.veqb.and.128B
__builtin_HEXAGON_V6_veqb_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqb_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqb.or.128B
__builtin_HEXAGON_V6_veqb_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqb_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqb.xor.128B
__builtin_HEXAGON_V6_veqb_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqb_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.128B
__builtin_HEXAGON_V6_veqh_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.and.128B
__builtin_HEXAGON_V6_veqh_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqh_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.or.128B
__builtin_HEXAGON_V6_veqh_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqh_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqh.xor.128B
__builtin_HEXAGON_V6_veqh_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqh_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.128B
__builtin_HEXAGON_V6_veqw_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.and.128B
__builtin_HEXAGON_V6_veqw_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqw_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.or.128B
__builtin_HEXAGON_V6_veqw_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqw_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.veqw.xor.128B
__builtin_HEXAGON_V6_veqw_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_veqw_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgathermh.128B
__builtin_HEXAGON_V6_vgathermh_128B(0, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermhq.128B
__builtin_HEXAGON_V6_vgathermhq_128B(0, v128, 0, 0, v128);
__builtin_HEXAGON_V6_vgathermhq_128B(0, q128, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermhw.128B
__builtin_HEXAGON_V6_vgathermhw_128B(0, 0, 0, v256);
// CHECK: @llvm.hexagon.V6.vgathermhwq.128B
__builtin_HEXAGON_V6_vgathermhwq_128B(0, v128, 0, 0, v256);
__builtin_HEXAGON_V6_vgathermhwq_128B(0, q128, 0, 0, v256);
// CHECK: @llvm.hexagon.V6.vgathermw.128B
__builtin_HEXAGON_V6_vgathermw_128B(0, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermwq.128B
__builtin_HEXAGON_V6_vgathermwq_128B(0, v128, 0, 0, v128);
__builtin_HEXAGON_V6_vgathermwq_128B(0, q128, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgtb.128B
__builtin_HEXAGON_V6_vgtb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtb.and.128B
__builtin_HEXAGON_V6_vgtb_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtb_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtb.or.128B
__builtin_HEXAGON_V6_vgtb_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtb_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtb.xor.128B
__builtin_HEXAGON_V6_vgtb_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtb_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.128B
__builtin_HEXAGON_V6_vgth_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.and.128B
__builtin_HEXAGON_V6_vgth_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgth_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.or.128B
__builtin_HEXAGON_V6_vgth_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgth_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgth.xor.128B
__builtin_HEXAGON_V6_vgth_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgth_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.128B
__builtin_HEXAGON_V6_vgtub_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.and.128B
__builtin_HEXAGON_V6_vgtub_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtub_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.or.128B
__builtin_HEXAGON_V6_vgtub_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtub_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtub.xor.128B
__builtin_HEXAGON_V6_vgtub_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtub_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.128B
__builtin_HEXAGON_V6_vgtuh_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.and.128B
__builtin_HEXAGON_V6_vgtuh_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtuh_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.or.128B
__builtin_HEXAGON_V6_vgtuh_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtuh_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuh.xor.128B
__builtin_HEXAGON_V6_vgtuh_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtuh_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.128B
__builtin_HEXAGON_V6_vgtuw_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.and.128B
__builtin_HEXAGON_V6_vgtuw_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtuw_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.or.128B
__builtin_HEXAGON_V6_vgtuw_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtuw_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtuw.xor.128B
__builtin_HEXAGON_V6_vgtuw_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtuw_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.128B
__builtin_HEXAGON_V6_vgtw_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.and.128B
__builtin_HEXAGON_V6_vgtw_and_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtw_and_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.or.128B
__builtin_HEXAGON_V6_vgtw_or_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtw_or_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vgtw.xor.128B
__builtin_HEXAGON_V6_vgtw_xor_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vgtw_xor_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vinsertwr.128B
__builtin_HEXAGON_V6_vinsertwr_128B(v128, 0);
// CHECK: @llvm.hexagon.V6.vlalignb.128B
@ -416,13 +417,13 @@ void test() {
// CHECK: @llvm.hexagon.V6.vlutvwhi.128B
__builtin_HEXAGON_V6_vlutvwhi_128B(v128, v128, 0);
// CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B
__builtin_HEXAGON_V6_vmaskedstorenq_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vmaskedstorenq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B
__builtin_HEXAGON_V6_vmaskedstorentnq_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vmaskedstorentnq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B
__builtin_HEXAGON_V6_vmaskedstorentq_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vmaskedstorentq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B
__builtin_HEXAGON_V6_vmaskedstoreq_128B(v128, 0, v128);
__builtin_HEXAGON_V6_vmaskedstoreq_128B(q128, 0, v128);
// CHECK: @llvm.hexagon.V6.vmaxb.128B
__builtin_HEXAGON_V6_vmaxb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vmaxh.128B
@ -566,7 +567,7 @@ void test() {
// CHECK: @llvm.hexagon.V6.vmpyuhv.acc.128B
__builtin_HEXAGON_V6_vmpyuhv_acc_128B(v256, v128, v128);
// CHECK: @llvm.hexagon.V6.vmux.128B
__builtin_HEXAGON_V6_vmux_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vmux_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vnavgb.128B
__builtin_HEXAGON_V6_vnavgb_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vnavgh.128B
@ -602,11 +603,11 @@ void test() {
// CHECK: @llvm.hexagon.V6.vpopcounth.128B
__builtin_HEXAGON_V6_vpopcounth_128B(v128);
// CHECK: @llvm.hexagon.V6.vprefixqb.128B
__builtin_HEXAGON_V6_vprefixqb_128B(v128);
__builtin_HEXAGON_V6_vprefixqb_128B(q128);
// CHECK: @llvm.hexagon.V6.vprefixqh.128B
__builtin_HEXAGON_V6_vprefixqh_128B(v128);
__builtin_HEXAGON_V6_vprefixqh_128B(q128);
// CHECK: @llvm.hexagon.V6.vprefixqw.128B
__builtin_HEXAGON_V6_vprefixqw_128B(v128);
__builtin_HEXAGON_V6_vprefixqw_128B(q128);
// CHECK: @llvm.hexagon.V6.vrdelta.128B
__builtin_HEXAGON_V6_vrdelta_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vrmpybub.rtt.128B
@ -676,19 +677,19 @@ void test() {
// CHECK: @llvm.hexagon.V6.vscattermh.add.128B
__builtin_HEXAGON_V6_vscattermh_add_128B(0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermhq.128B
__builtin_HEXAGON_V6_vscattermhq_128B(v128, 0, 0, v128, v128);
__builtin_HEXAGON_V6_vscattermhq_128B(q128, 0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermhw.128B
__builtin_HEXAGON_V6_vscattermhw_128B(0, 0, v256, v128);
// CHECK: @llvm.hexagon.V6.vscattermhw.add.128B
__builtin_HEXAGON_V6_vscattermhw_add_128B(0, 0, v256, v128);
// CHECK: @llvm.hexagon.V6.vscattermhwq.128B
__builtin_HEXAGON_V6_vscattermhwq_128B(v128, 0, 0, v256, v128);
__builtin_HEXAGON_V6_vscattermhwq_128B(q128, 0, 0, v256, v128);
// CHECK: @llvm.hexagon.V6.vscattermw.128B
__builtin_HEXAGON_V6_vscattermw_128B(0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermw.add.128B
__builtin_HEXAGON_V6_vscattermw_add_128B(0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vscattermwq.128B
__builtin_HEXAGON_V6_vscattermwq_128B(v128, 0, 0, v128, v128);
__builtin_HEXAGON_V6_vscattermwq_128B(q128, 0, 0, v128, v128);
// CHECK: @llvm.hexagon.V6.vsh.128B
__builtin_HEXAGON_V6_vsh_128B(v128);
// CHECK: @llvm.hexagon.V6.vshufeh.128B
@ -714,9 +715,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubb.dv.128B
__builtin_HEXAGON_V6_vsubb_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vsubbnq.128B
__builtin_HEXAGON_V6_vsubbnq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vsubbnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbq.128B
__builtin_HEXAGON_V6_vsubbq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vsubbq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbsat.128B
__builtin_HEXAGON_V6_vsubbsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbsat.dv.128B
@ -728,9 +729,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubh.dv.128B
__builtin_HEXAGON_V6_vsubh_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vsubhnq.128B
__builtin_HEXAGON_V6_vsubhnq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vsubhnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhq.128B
__builtin_HEXAGON_V6_vsubhq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vsubhq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhsat.128B
__builtin_HEXAGON_V6_vsubhsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhsat.dv.128B
@ -760,15 +761,15 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubw.dv.128B
__builtin_HEXAGON_V6_vsubw_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vsubwnq.128B
__builtin_HEXAGON_V6_vsubwnq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vsubwnq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwq.128B
__builtin_HEXAGON_V6_vsubwq_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vsubwq_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwsat.128B
__builtin_HEXAGON_V6_vsubwsat_128B(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwsat.dv.128B
__builtin_HEXAGON_V6_vsubwsat_dv_128B(v256, v256);
// CHECK: @llvm.hexagon.V6.vswap.128B
__builtin_HEXAGON_V6_vswap_128B(v128, v128, v128);
__builtin_HEXAGON_V6_vswap_128B(q128, v128, v128);
// CHECK: @llvm.hexagon.V6.vtmpyb.128B
__builtin_HEXAGON_V6_vtmpyb_128B(v256, 0);
// CHECK: @llvm.hexagon.V6.vtmpyb.acc.128B

View File

@ -2,6 +2,7 @@
// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length64b -emit-llvm %s -o - | FileCheck %s
void test() {
int q64 __attribute__((__vector_size__(64)));
int v64 __attribute__((__vector_size__(64)));
int v128 __attribute__((__vector_size__(128)));
@ -18,33 +19,33 @@ void test() {
// CHECK: @llvm.hexagon.V6.lvsplatw
__builtin_HEXAGON_V6_lvsplatw(0);
// CHECK: @llvm.hexagon.V6.pred.and
__builtin_HEXAGON_V6_pred_and(v64, v64);
__builtin_HEXAGON_V6_pred_and(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.and.n
__builtin_HEXAGON_V6_pred_and_n(v64, v64);
__builtin_HEXAGON_V6_pred_and_n(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.not
__builtin_HEXAGON_V6_pred_not(v64);
__builtin_HEXAGON_V6_pred_not(q64);
// CHECK: @llvm.hexagon.V6.pred.or
__builtin_HEXAGON_V6_pred_or(v64, v64);
__builtin_HEXAGON_V6_pred_or(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.or.n
__builtin_HEXAGON_V6_pred_or_n(v64, v64);
__builtin_HEXAGON_V6_pred_or_n(q64, q64);
// CHECK: @llvm.hexagon.V6.pred.scalar2
__builtin_HEXAGON_V6_pred_scalar2(0);
// CHECK: @llvm.hexagon.V6.pred.scalar2v2
__builtin_HEXAGON_V6_pred_scalar2v2(0);
// CHECK: @llvm.hexagon.V6.pred.xor
__builtin_HEXAGON_V6_pred_xor(v64, v64);
__builtin_HEXAGON_V6_pred_xor(q64, q64);
// CHECK: @llvm.hexagon.V6.shuffeqh
__builtin_HEXAGON_V6_shuffeqh(v64, v64);
__builtin_HEXAGON_V6_shuffeqh(q64, q64);
// CHECK: @llvm.hexagon.V6.shuffeqw
__builtin_HEXAGON_V6_shuffeqw(v64, v64);
__builtin_HEXAGON_V6_shuffeqw(q64, q64);
// CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai
__builtin_HEXAGON_V6_vS32b_nqpred_ai(v64, 0, v64);
__builtin_HEXAGON_V6_vS32b_nqpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai
__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(v64, 0, v64);
__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai
__builtin_HEXAGON_V6_vS32b_nt_qpred_ai(v64, 0, v64);
__builtin_HEXAGON_V6_vS32b_nt_qpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vS32b.qpred.ai
__builtin_HEXAGON_V6_vS32b_qpred_ai(v64, 0, v64);
__builtin_HEXAGON_V6_vS32b_qpred_ai(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vabsb
__builtin_HEXAGON_V6_vabsb(v64);
// CHECK: @llvm.hexagon.V6.vabsb.sat
@ -70,9 +71,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddb.dv
__builtin_HEXAGON_V6_vaddb_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddbnq
__builtin_HEXAGON_V6_vaddbnq(v64, v64, v64);
__builtin_HEXAGON_V6_vaddbnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddbq
__builtin_HEXAGON_V6_vaddbq(v64, v64, v64);
__builtin_HEXAGON_V6_vaddbq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddbsat
__builtin_HEXAGON_V6_vaddbsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vaddbsat.dv
@ -88,9 +89,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddh.dv
__builtin_HEXAGON_V6_vaddh_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddhnq
__builtin_HEXAGON_V6_vaddhnq(v64, v64, v64);
__builtin_HEXAGON_V6_vaddhnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddhq
__builtin_HEXAGON_V6_vaddhq(v64, v64, v64);
__builtin_HEXAGON_V6_vaddhq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddhsat
__builtin_HEXAGON_V6_vaddhsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vaddhsat.dv
@ -126,9 +127,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vaddw.dv
__builtin_HEXAGON_V6_vaddw_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vaddwnq
__builtin_HEXAGON_V6_vaddwnq(v64, v64, v64);
__builtin_HEXAGON_V6_vaddwnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddwq
__builtin_HEXAGON_V6_vaddwq(v64, v64, v64);
__builtin_HEXAGON_V6_vaddwq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vaddwsat
__builtin_HEXAGON_V6_vaddwsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vaddwsat.dv
@ -140,21 +141,21 @@ void test() {
// CHECK: @llvm.hexagon.V6.vand
__builtin_HEXAGON_V6_vand(v64, v64);
// CHECK: @llvm.hexagon.V6.vandnqrt
__builtin_HEXAGON_V6_vandnqrt(v64, 0);
__builtin_HEXAGON_V6_vandnqrt(q64, 0);
// CHECK: @llvm.hexagon.V6.vandnqrt.acc
__builtin_HEXAGON_V6_vandnqrt_acc(v64, v64, 0);
__builtin_HEXAGON_V6_vandnqrt_acc(v64, q64, 0);
// CHECK: @llvm.hexagon.V6.vandqrt
__builtin_HEXAGON_V6_vandqrt(v64, 0);
__builtin_HEXAGON_V6_vandqrt(q64, 0);
// CHECK: @llvm.hexagon.V6.vandqrt.acc
__builtin_HEXAGON_V6_vandqrt_acc(v64, v64, 0);
__builtin_HEXAGON_V6_vandqrt_acc(v64, q64, 0);
// CHECK: @llvm.hexagon.V6.vandvnqv
__builtin_HEXAGON_V6_vandvnqv(v64, v64);
__builtin_HEXAGON_V6_vandvnqv(q64, v64);
// CHECK: @llvm.hexagon.V6.vandvqv
__builtin_HEXAGON_V6_vandvqv(v64, v64);
__builtin_HEXAGON_V6_vandvqv(q64, v64);
// CHECK: @llvm.hexagon.V6.vandvrt
__builtin_HEXAGON_V6_vandvrt(v64, 0);
// CHECK: @llvm.hexagon.V6.vandvrt.acc
__builtin_HEXAGON_V6_vandvrt_acc(v64, v64, 0);
__builtin_HEXAGON_V6_vandvrt_acc(q64, v64, 0);
// CHECK: @llvm.hexagon.V6.vaslh
__builtin_HEXAGON_V6_vaslh(v64, 0);
// CHECK: @llvm.hexagon.V6.vaslh.acc
@ -296,87 +297,87 @@ void test() {
// CHECK: @llvm.hexagon.V6.veqb
__builtin_HEXAGON_V6_veqb(v64, v64);
// CHECK: @llvm.hexagon.V6.veqb.and
__builtin_HEXAGON_V6_veqb_and(v64, v64, v64);
__builtin_HEXAGON_V6_veqb_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqb.or
__builtin_HEXAGON_V6_veqb_or(v64, v64, v64);
__builtin_HEXAGON_V6_veqb_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqb.xor
__builtin_HEXAGON_V6_veqb_xor(v64, v64, v64);
__builtin_HEXAGON_V6_veqb_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqh
__builtin_HEXAGON_V6_veqh(v64, v64);
// CHECK: @llvm.hexagon.V6.veqh.and
__builtin_HEXAGON_V6_veqh_and(v64, v64, v64);
__builtin_HEXAGON_V6_veqh_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqh.or
__builtin_HEXAGON_V6_veqh_or(v64, v64, v64);
__builtin_HEXAGON_V6_veqh_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqh.xor
__builtin_HEXAGON_V6_veqh_xor(v64, v64, v64);
__builtin_HEXAGON_V6_veqh_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqw
__builtin_HEXAGON_V6_veqw(v64, v64);
// CHECK: @llvm.hexagon.V6.veqw.and
__builtin_HEXAGON_V6_veqw_and(v64, v64, v64);
__builtin_HEXAGON_V6_veqw_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqw.or
__builtin_HEXAGON_V6_veqw_or(v64, v64, v64);
__builtin_HEXAGON_V6_veqw_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.veqw.xor
__builtin_HEXAGON_V6_veqw_xor(v64, v64, v64);
__builtin_HEXAGON_V6_veqw_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgathermh
__builtin_HEXAGON_V6_vgathermh(0, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgathermhq
__builtin_HEXAGON_V6_vgathermhq(0, v64, 0, 0, v64);
__builtin_HEXAGON_V6_vgathermhq(0, q64, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgathermhw
__builtin_HEXAGON_V6_vgathermhw(0, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermhwq
__builtin_HEXAGON_V6_vgathermhwq(0, v64, 0, 0, v128);
__builtin_HEXAGON_V6_vgathermhwq(0, q64, 0, 0, v128);
// CHECK: @llvm.hexagon.V6.vgathermw
__builtin_HEXAGON_V6_vgathermw(0, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgathermwq
__builtin_HEXAGON_V6_vgathermwq(0, v64, 0, 0, v64);
__builtin_HEXAGON_V6_vgathermwq(0, q64, 0, 0, v64);
// CHECK: @llvm.hexagon.V6.vgtb
__builtin_HEXAGON_V6_vgtb(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtb.and
__builtin_HEXAGON_V6_vgtb_and(v64, v64, v64);
__builtin_HEXAGON_V6_vgtb_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtb.or
__builtin_HEXAGON_V6_vgtb_or(v64, v64, v64);
__builtin_HEXAGON_V6_vgtb_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtb.xor
__builtin_HEXAGON_V6_vgtb_xor(v64, v64, v64);
__builtin_HEXAGON_V6_vgtb_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgth
__builtin_HEXAGON_V6_vgth(v64, v64);
// CHECK: @llvm.hexagon.V6.vgth.and
__builtin_HEXAGON_V6_vgth_and(v64, v64, v64);
__builtin_HEXAGON_V6_vgth_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgth.or
__builtin_HEXAGON_V6_vgth_or(v64, v64, v64);
__builtin_HEXAGON_V6_vgth_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgth.xor
__builtin_HEXAGON_V6_vgth_xor(v64, v64, v64);
__builtin_HEXAGON_V6_vgth_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub
__builtin_HEXAGON_V6_vgtub(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub.and
__builtin_HEXAGON_V6_vgtub_and(v64, v64, v64);
__builtin_HEXAGON_V6_vgtub_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub.or
__builtin_HEXAGON_V6_vgtub_or(v64, v64, v64);
__builtin_HEXAGON_V6_vgtub_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtub.xor
__builtin_HEXAGON_V6_vgtub_xor(v64, v64, v64);
__builtin_HEXAGON_V6_vgtub_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh
__builtin_HEXAGON_V6_vgtuh(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh.and
__builtin_HEXAGON_V6_vgtuh_and(v64, v64, v64);
__builtin_HEXAGON_V6_vgtuh_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh.or
__builtin_HEXAGON_V6_vgtuh_or(v64, v64, v64);
__builtin_HEXAGON_V6_vgtuh_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuh.xor
__builtin_HEXAGON_V6_vgtuh_xor(v64, v64, v64);
__builtin_HEXAGON_V6_vgtuh_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw
__builtin_HEXAGON_V6_vgtuw(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw.and
__builtin_HEXAGON_V6_vgtuw_and(v64, v64, v64);
__builtin_HEXAGON_V6_vgtuw_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw.or
__builtin_HEXAGON_V6_vgtuw_or(v64, v64, v64);
__builtin_HEXAGON_V6_vgtuw_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtuw.xor
__builtin_HEXAGON_V6_vgtuw_xor(v64, v64, v64);
__builtin_HEXAGON_V6_vgtuw_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw
__builtin_HEXAGON_V6_vgtw(v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw.and
__builtin_HEXAGON_V6_vgtw_and(v64, v64, v64);
__builtin_HEXAGON_V6_vgtw_and(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw.or
__builtin_HEXAGON_V6_vgtw_or(v64, v64, v64);
__builtin_HEXAGON_V6_vgtw_or(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vgtw.xor
__builtin_HEXAGON_V6_vgtw_xor(v64, v64, v64);
__builtin_HEXAGON_V6_vgtw_xor(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vinsertwr
__builtin_HEXAGON_V6_vinsertwr(v64, 0);
// CHECK: @llvm.hexagon.V6.vlalignb
@ -416,13 +417,13 @@ void test() {
// CHECK: @llvm.hexagon.V6.vlutvwhi
__builtin_HEXAGON_V6_vlutvwhi(v64, v64, 0);
// CHECK: @llvm.hexagon.V6.vmaskedstorenq
__builtin_HEXAGON_V6_vmaskedstorenq(v64, 0, v64);
__builtin_HEXAGON_V6_vmaskedstorenq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaskedstorentnq
__builtin_HEXAGON_V6_vmaskedstorentnq(v64, 0, v64);
__builtin_HEXAGON_V6_vmaskedstorentnq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaskedstorentq
__builtin_HEXAGON_V6_vmaskedstorentq(v64, 0, v64);
__builtin_HEXAGON_V6_vmaskedstorentq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaskedstoreq
__builtin_HEXAGON_V6_vmaskedstoreq(v64, 0, v64);
__builtin_HEXAGON_V6_vmaskedstoreq(q64, 0, v64);
// CHECK: @llvm.hexagon.V6.vmaxb
__builtin_HEXAGON_V6_vmaxb(v64, v64);
// CHECK: @llvm.hexagon.V6.vmaxh
@ -566,7 +567,7 @@ void test() {
// CHECK: @llvm.hexagon.V6.vmpyuhv.acc
__builtin_HEXAGON_V6_vmpyuhv_acc(v128, v64, v64);
// CHECK: @llvm.hexagon.V6.vmux
__builtin_HEXAGON_V6_vmux(v64, v64, v64);
__builtin_HEXAGON_V6_vmux(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vnavgb
__builtin_HEXAGON_V6_vnavgb(v64, v64);
// CHECK: @llvm.hexagon.V6.vnavgh
@ -602,11 +603,11 @@ void test() {
// CHECK: @llvm.hexagon.V6.vpopcounth
__builtin_HEXAGON_V6_vpopcounth(v64);
// CHECK: @llvm.hexagon.V6.vprefixqb
__builtin_HEXAGON_V6_vprefixqb(v64);
__builtin_HEXAGON_V6_vprefixqb(q64);
// CHECK: @llvm.hexagon.V6.vprefixqh
__builtin_HEXAGON_V6_vprefixqh(v64);
__builtin_HEXAGON_V6_vprefixqh(q64);
// CHECK: @llvm.hexagon.V6.vprefixqw
__builtin_HEXAGON_V6_vprefixqw(v64);
__builtin_HEXAGON_V6_vprefixqw(q64);
// CHECK: @llvm.hexagon.V6.vrdelta
__builtin_HEXAGON_V6_vrdelta(v64, v64);
// CHECK: @llvm.hexagon.V6.vrmpybub.rtt
@ -676,19 +677,19 @@ void test() {
// CHECK: @llvm.hexagon.V6.vscattermh.add
__builtin_HEXAGON_V6_vscattermh_add(0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermhq
__builtin_HEXAGON_V6_vscattermhq(v64, 0, 0, v64, v64);
__builtin_HEXAGON_V6_vscattermhq(q64, 0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermhw
__builtin_HEXAGON_V6_vscattermhw(0, 0, v128, v64);
// CHECK: @llvm.hexagon.V6.vscattermhw.add
__builtin_HEXAGON_V6_vscattermhw_add(0, 0, v128, v64);
// CHECK: @llvm.hexagon.V6.vscattermhwq
__builtin_HEXAGON_V6_vscattermhwq(v64, 0, 0, v128, v64);
__builtin_HEXAGON_V6_vscattermhwq(q64, 0, 0, v128, v64);
// CHECK: @llvm.hexagon.V6.vscattermw
__builtin_HEXAGON_V6_vscattermw(0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermw.add
__builtin_HEXAGON_V6_vscattermw_add(0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vscattermwq
__builtin_HEXAGON_V6_vscattermwq(v64, 0, 0, v64, v64);
__builtin_HEXAGON_V6_vscattermwq(q64, 0, 0, v64, v64);
// CHECK: @llvm.hexagon.V6.vsh
__builtin_HEXAGON_V6_vsh(v64);
// CHECK: @llvm.hexagon.V6.vshufeh
@ -714,9 +715,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubb.dv
__builtin_HEXAGON_V6_vsubb_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubbnq
__builtin_HEXAGON_V6_vsubbnq(v64, v64, v64);
__builtin_HEXAGON_V6_vsubbnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubbq
__builtin_HEXAGON_V6_vsubbq(v64, v64, v64);
__builtin_HEXAGON_V6_vsubbq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubbsat
__builtin_HEXAGON_V6_vsubbsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vsubbsat.dv
@ -728,9 +729,9 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubh.dv
__builtin_HEXAGON_V6_vsubh_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubhnq
__builtin_HEXAGON_V6_vsubhnq(v64, v64, v64);
__builtin_HEXAGON_V6_vsubhnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubhq
__builtin_HEXAGON_V6_vsubhq(v64, v64, v64);
__builtin_HEXAGON_V6_vsubhq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubhsat
__builtin_HEXAGON_V6_vsubhsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vsubhsat.dv
@ -760,15 +761,15 @@ void test() {
// CHECK: @llvm.hexagon.V6.vsubw.dv
__builtin_HEXAGON_V6_vsubw_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vsubwnq
__builtin_HEXAGON_V6_vsubwnq(v64, v64, v64);
__builtin_HEXAGON_V6_vsubwnq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubwq
__builtin_HEXAGON_V6_vsubwq(v64, v64, v64);
__builtin_HEXAGON_V6_vsubwq(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vsubwsat
__builtin_HEXAGON_V6_vsubwsat(v64, v64);
// CHECK: @llvm.hexagon.V6.vsubwsat.dv
__builtin_HEXAGON_V6_vsubwsat_dv(v128, v128);
// CHECK: @llvm.hexagon.V6.vswap
__builtin_HEXAGON_V6_vswap(v64, v64, v64);
__builtin_HEXAGON_V6_vswap(q64, v64, v64);
// CHECK: @llvm.hexagon.V6.vtmpyb
__builtin_HEXAGON_V6_vtmpyb(v128, 0);
// CHECK: @llvm.hexagon.V6.vtmpyb.acc

View File

@ -258,44 +258,22 @@ Hexagon_v64i32_v64i32v32i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
// Masked vector stores
//
//
// Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
// tag: V6_vS32b_qpred_ai
class Hexagon_vv64ivmemv512_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[], [llvm_v512i1_ty,llvm_ptr_ty,llvm_v16i32_ty],
[IntrArgMemOnly]>;
class Hexagon_custom_vms_Intrinsic
: Hexagon_NonGCC_Intrinsic<
[], [llvm_v64i1_ty,llvm_ptr_ty,llvm_v16i32_ty], [IntrWriteMem]>;
//
// Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
// tag: V6_vS32b_qpred_ai_128B
class Hexagon_vv128ivmemv1024_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[], [llvm_v1024i1_ty,llvm_ptr_ty,llvm_v32i32_ty],
[IntrArgMemOnly]>;
class Hexagon_custom_vms_Intrinsic_128B
: Hexagon_NonGCC_Intrinsic<
[], [llvm_v128i1_ty,llvm_ptr_ty,llvm_v32i32_ty], [IntrWriteMem]>;
def int_hexagon_V6_vmaskedstoreq :
Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">;
def int_hexagon_V6_vmaskedstoreq: Hexagon_custom_vms_Intrinsic;
def int_hexagon_V6_vmaskedstorenq: Hexagon_custom_vms_Intrinsic;
def int_hexagon_V6_vmaskedstorentq: Hexagon_custom_vms_Intrinsic;
def int_hexagon_V6_vmaskedstorentnq: Hexagon_custom_vms_Intrinsic;
def int_hexagon_V6_vmaskedstorenq :
Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorenq">;
def int_hexagon_V6_vmaskedstorentq :
Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentq">;
def int_hexagon_V6_vmaskedstorentnq :
Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstorentnq">;
def int_hexagon_V6_vmaskedstoreq_128B :
Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstoreq_128B">;
def int_hexagon_V6_vmaskedstorenq_128B :
Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorenq_128B">;
def int_hexagon_V6_vmaskedstorentq_128B :
Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentq_128B">;
def int_hexagon_V6_vmaskedstorentnq_128B :
Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vmaskedstorentnq_128B">;
def int_hexagon_V6_vmaskedstoreq_128B: Hexagon_custom_vms_Intrinsic_128B;
def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B;
def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B;
def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B;
include "llvm/IR/IntrinsicsHexagonDep.td"

File diff suppressed because it is too large Load Diff

View File

@ -1199,7 +1199,7 @@ OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
MVT ByteTy = getSingleVT(MVT::i8);
MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX
MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
const SDLoc &dl(Results.InpNode);
SDValue B = getVectorConstant(Bytes, dl);
Results.push(Hexagon::V6_vd0, ByteTy, {});
@ -2203,28 +2203,28 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
case Intrinsic::hexagon_V6_vaddcarry: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vaddcarry_128B: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vsubcarry: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vsubcarry_128B: {
std::array<SDValue, 3> Ops = {
{N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
break;
}

View File

@ -1080,42 +1080,24 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
static Constant *convert_i1_to_i8(const Constant *ConstVal) {
SmallVector<Constant *, 128> NewConst;
const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
if (!CV)
return nullptr;
LLVMContext &Ctx = ConstVal->getContext();
IRBuilder<> IRB(Ctx);
unsigned NumVectorElements = CV->getNumOperands();
assert(isPowerOf2_32(NumVectorElements) &&
"conversion only supported for pow2 VectorSize!");
for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
uint8_t x = 0;
for (unsigned j = 0; j < 8; ++j) {
uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
x |= y << (7 - j);
}
assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
NewConst.push_back(IRB.getInt8(x));
}
return ConstantVector::get(NewConst);
}
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
Constant *CVal = nullptr;
bool isVTi1Type = false;
if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
Type *CValTy = ConstVal->getType();
if (CValTy->isVectorTy() &&
CValTy->getVectorElementType()->isIntegerTy(1)) {
CVal = convert_i1_to_i8(ConstVal);
isVTi1Type = (CVal != nullptr);
if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
if (CV->getType()->getVectorElementType()->isIntegerTy(1)) {
IRBuilder<> IRB(CV->getContext());
SmallVector<Constant*, 128> NewConst;
unsigned VecLen = CV->getNumOperands();
assert(isPowerOf2_32(VecLen) &&
"conversion only supported for pow2 VectorSize");
for (unsigned i = 0; i < VecLen; ++i)
NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
CVal = ConstantVector::get(NewConst);
isVTi1Type = true;
}
}
unsigned Align = CPN->getAlignment();
@ -3225,8 +3207,8 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
switch (VT.getSizeInBits()) {
default:
return {0u, nullptr};
case 512:
case 1024:
case 64:
case 128:
return {0u, &Hexagon::HvxQRRegClass};
}
break;

View File

@ -39,7 +39,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
} else if (Subtarget.useHVX128BOps()) {
addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
@ -50,7 +49,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
}
// Set up operation actions.

View File

@ -277,76 +277,6 @@ def : Pat <(v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
Requires<[UseHVX]>;
}
def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
(v512i1 (V6_vandvrt (v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
(v512i1 (V6_vandvrt (v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
(v512i1 (V6_vandvrt (v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
(v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
(v32i16 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
(v64i8 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
(v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
(v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
(v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
(v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
(v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
(v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
let AddedComplexity = 140 in {
def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
(v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
Requires<[UseHVX]>;
def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
(v512i1 (V6_vandvrt
(v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
(v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
Requires<[UseHVX]>;
def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
(v1024i1 (V6_vandvrt
(v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
Requires<[UseHVX]>;
}
def: Pat<(v64i16 (trunc v64i32:$Vdd)),
(v64i16 (V6_vpackwh_sat
(v32i32 (V6_hi HvxWR:$Vdd)),

View File

@ -25,59 +25,59 @@ def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
(v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_hi)) >;
}
def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
(v512i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v64i1 (bitconvert (v16i32 HvxVR:$src1))),
(v64i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
(v512i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v64i1 (bitconvert (v32i16 HvxVR:$src1))),
(v64i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
(v512i1 (V6_vandvrt(v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v64i1 (bitconvert (v64i8 HvxVR:$src1))),
(v64i1 (V6_vandvrt(v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
(v16i32 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v16i32 (bitconvert (v64i1 HvxQR:$src1))),
(v16i32 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
(v32i16 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v32i16 (bitconvert (v64i1 HvxQR:$src1))),
(v32i16 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
(v64i8 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v64i8 (bitconvert (v64i1 HvxQR:$src1))),
(v64i8 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
(v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v128i1 (bitconvert (v32i32 HvxVR:$src1))),
(v128i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
(v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v128i1 (bitconvert (v64i16 HvxVR:$src1))),
(v128i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
(v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v128i1 (bitconvert (v128i8 HvxVR:$src1))),
(v128i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
(v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v32i32 (bitconvert (v128i1 HvxQR:$src1))),
(v32i32 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
(v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v64i16 (bitconvert (v128i1 HvxQR:$src1))),
(v64i16 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
(v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
def : Pat <(v128i8 (bitconvert (v128i1 HvxQR:$src1))),
(v128i8 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
let AddedComplexity = 140 in {
def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
def : Pat <(store (v64i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
(v16i32 (V6_vandqrt (v512i1 HvxQR:$src1),
(v16i32 (V6_vandqrt (v64i1 HvxQR:$src1),
(A2_tfrsi 0x01010101))))>;
def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
(v512i1 (V6_vandvrt
def : Pat <(v64i1 (load (i32 IntRegs:$addr))),
(v64i1 (V6_vandvrt
(v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
def : Pat <(store (v128i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
(v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1),
(v32i32 (V6_vandqrt (v128i1 HvxQR:$src1),
(A2_tfrsi 0x01010101))))>;
def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
(v1024i1 (V6_vandvrt
def : Pat <(v128i1 (load (i32 IntRegs:$addr))),
(v128i1 (V6_vandvrt
(v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
}

View File

@ -319,7 +319,7 @@ let Namespace = "Hexagon" in {
// HVX types
def VecI1: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v512i1, v1024i1, v512i1]>;
[v64i1, v128i1, v64i1]>;
def VecI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v64i8, v128i8, v64i8]>;
def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
@ -355,10 +355,10 @@ def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
[RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
}
def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512,
def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 128,
(add Q0, Q1, Q2, Q3)> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
[RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
[RegInfo<64,512,512>, RegInfo<128,1024,1024>, RegInfo<64,512,512>]>;
}
def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,

View File

@ -286,9 +286,6 @@ public:
ArrayRef<MVT> ElemTypes = getHVXElementTypes();
if (IncludeBool && ElemTy == MVT::i1) {
// Special case for the v512i1, etc.
if (8*HwLen == NumElems)
return true;
// Boolean HVX vector types are formed from regular HVX vector types
// by replacing the element type with i1.
for (MVT T : ElemTypes)

View File

@ -10,7 +10,7 @@ define <128 x i8> @t00(<128 x i8> %a0, <128 x i8> %a1) #0 {
ret <128 x i8> %v0
}
declare <1024 x i1> @llvm.hexagon.vandvrt.128B(<128 x i8>, i32)
declare <128 x i1> @llvm.hexagon.vandvrt.128B(<128 x i8>, i32)
; CHECK-LABEL: t01
; CHECK: vor(v{{[0-9:]+}},v{{[0-9:]+}})

View File

@ -3,40 +3,34 @@
define inreg <16 x i32> @f0(i32 %a0, <16 x i32>* nocapture %a1) #0 {
b0:
%v0 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a0)
%v1 = tail call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %v0)
%v0 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a0)
%v1 = tail call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %v0)
%v2 = icmp ult i32 %a0, 48
br i1 %v2, label %b1, label %b2
b1: ; preds = %b0
%v3 = add nuw nsw i32 %a0, 16
%v4 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v3)
%v5 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v4, <512 x i1> %v1)
%v4 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v3)
%v5 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v4, <64 x i1> %v1)
br label %b2
b2: ; preds = %b1, %b0
%v6 = phi <512 x i1> [ %v5, %b1 ], [ %v1, %b0 ]
%v7 = bitcast <512 x i1> %v6 to <16 x i32>
%v6 = phi <64 x i1> [ %v5, %b1 ], [ %v1, %b0 ]
%v7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v6, i32 -1)
%v8 = getelementptr inbounds <16 x i32>, <16 x i32>* %a1, i32 1
%v9 = load <16 x i32>, <16 x i32>* %v8, align 64
%v10 = getelementptr inbounds <16 x i32>, <16 x i32>* %a1, i32 2
%v11 = load <16 x i32>, <16 x i32>* %v10, align 64
%v12 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v6, <16 x i32> %v9, <16 x i32> %v11)
%v12 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v6, <16 x i32> %v9, <16 x i32> %v11)
store <16 x i32> %v12, <16 x i32>* %a1, align 64
ret <16 x i32> %v7
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }

File diff suppressed because one or more lines are too long

View File

@ -14,22 +14,21 @@ target triple = "hexagon"
; CHECK: if (q{{[0-3]}}) vmem
define void @fred(i32 %a0) #0 {
b1:
%v2 = tail call <1024 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32 %a0) #2
%v2 = tail call <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32 %a0) #2
br i1 undef, label %b3, label %b5
b3: ; preds = %b1
%v4 = tail call <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1> %v2) #2
%v4 = tail call <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1> %v2) #2
br label %b5
b5: ; preds = %b3, %b1
%v6 = phi <1024 x i1> [ %v4, %b3 ], [ %v2, %b1 ]
%v7 = bitcast <1024 x i1> %v6 to <32 x i32>
tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<32 x i32> %v7, <32 x i32>* undef, <32 x i32> undef) #2
%v6 = phi <128 x i1> [ %v4, %b3 ], [ %v2, %b1 ]
tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<128 x i1> %v6, <32 x i32>* undef, <32 x i32> undef) #2
ret void
}
declare <1024 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1
declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1
declare <128 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1
declare <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1>) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
attributes #1 = { nounwind readnone }

View File

@ -47,61 +47,61 @@ for.body:
%18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
%arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
%19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
%20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
%21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
%22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
%23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
%24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
%25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
%26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
%27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
%28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
%29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
%30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
%31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
%32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
%33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
%34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
%35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
%36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
%37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
%38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
%39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
%40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
%41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
%42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
%43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
%44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
%45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
%46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
%47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
%48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
%49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
%50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
%51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
%52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
%53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
%54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
%55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
%56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
%57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
%58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
%59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
%60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
%61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
%62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
%63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
%64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
%65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
%66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
%67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
%68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
%69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
%70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
%71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
%72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
%73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
%74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
%20 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
%21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %11, <32 x i32> %2)
%22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %2, <32 x i32> %11)
%23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> undef, <32 x i32> %3)
%24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %20, <32 x i32> %12, <32 x i32> undef)
%25 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
%26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %15, <32 x i32> %7)
%27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %7, <32 x i32> %15)
%28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %16, <32 x i32> %8)
%29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %8, <32 x i32> %16)
%30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %17, <32 x i32> %9)
%31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %25, <32 x i32> %9, <32 x i32> %17)
%32 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
%33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %13, <32 x i32> %4)
%34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %4, <32 x i32> %13)
%35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> undef, <32 x i32> %5)
%36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %5, <32 x i32> undef)
%37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %14, <32 x i32> %6)
%38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %32, <32 x i32> %6, <32 x i32> %14)
%39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
%40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
%41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
%42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
%43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
%44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
%45 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
%46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %26, <32 x i32> %21)
%47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %21, <32 x i32> %26)
%48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %28, <32 x i32> %23)
%49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %23, <32 x i32> %28)
%50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %30, <32 x i32> %24)
%51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %45, <32 x i32> %24, <32 x i32> %30)
%52 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
%53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %27, <32 x i32> %22)
%54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %22, <32 x i32> %27)
%55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> %29, <32 x i32> undef)
%56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %52, <32 x i32> undef, <32 x i32> %31)
%57 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
%58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %39, <32 x i32> %33)
%59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %33, <32 x i32> %39)
%60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %41, <32 x i32> %35)
%61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %57, <32 x i32> %43, <32 x i32> %37)
%62 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
%63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %62, <32 x i32> %42, <32 x i32> %36)
%64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %62, <32 x i32> %38, <32 x i32> %44)
%65 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
%66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %58, <32 x i32> %46)
%67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %60, <32 x i32> %48)
%68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %65, <32 x i32> %61, <32 x i32> %50)
%69 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
%70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
%71 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
%72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %71, <32 x i32> %63, <32 x i32> %55)
%73 = tail call <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
%74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %73, <32 x i32> %56, <32 x i32> %64)
%75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
%76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
%77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
@ -129,9 +129,9 @@ for.end:
ret void
}
declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1

View File

@ -7,51 +7,52 @@
define void @f0(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <32 x i32> %a0 to <1024 x i1>
tail call void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
%v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.qpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
; CHECK-LABEL: f1:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}}
define void @f1(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <32 x i32> %a0 to <1024 x i1>
tail call void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
%v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.nqpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
; CHECK-LABEL: f2:
; CHECK: if (q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f2(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <32 x i32> %a0 to <1024 x i1>
tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
%v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
; CHECK-LABEL: f3:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f3(<32 x i32> %a0, i8* %a1, <32 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <32 x i32> %a0 to <1024 x i1>
tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<1024 x i1> %v0, i8* %a1, <32 x i32> %a2)
%v0 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<128 x i1> %v0, i8* %a1, <32 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<1024 x i1>, i8*, <32 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B(<128 x i1>, i8*, <32 x i32>) #0
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #1
attributes #0 = { argmemonly nounwind }
attributes #1 = { nounwind readnone }

View File

@ -7,51 +7,52 @@
define void @f0(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <16 x i32> %a0 to <512 x i1>
tail call void @llvm.hexagon.V6.vS32b.qpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
%v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.qpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.qpred.ai(<512 x i1>, i8*, <16 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.qpred.ai(<64 x i1>, i8*, <16 x i32>) #0
; CHECK-LABEL: f1:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}}
define void @f1(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <16 x i32> %a0 to <512 x i1>
tail call void @llvm.hexagon.V6.vS32b.nqpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
%v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.nqpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.nqpred.ai(<512 x i1>, i8*, <16 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.nqpred.ai(<64 x i1>, i8*, <16 x i32>) #0
; CHECK-LABEL: f2:
; CHECK: if (q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f2(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <16 x i32> %a0 to <512 x i1>
tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
%v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<512 x i1>, i8*, <16 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.nt.qpred.ai(<64 x i1>, i8*, <16 x i32>) #0
; CHECK-LABEL: f3:
; CHECK: if (!q{{[0-3]}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
define void @f3(<16 x i32> %a0, i8* %a1, <16 x i32> %a2) local_unnamed_addr {
b0:
%v0 = bitcast <16 x i32> %a0 to <512 x i1>
tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<512 x i1> %v0, i8* %a1, <16 x i32> %a2)
%v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a0, i32 -1)
tail call void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<64 x i1> %v0, i8* %a1, <16 x i32> %a2)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<512 x i1>, i8*, <16 x i32>) #0
declare void @llvm.hexagon.V6.vS32b.nt.nqpred.ai(<64 x i1>, i8*, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { argmemonly nounwind }
attributes #1 = { nounwind readnone }

View File

@ -6,29 +6,34 @@
; CHECK: v{{[0-9]+}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <32 x i32> @f0(<32 x i32> %a0, <32 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
%v0 = bitcast i8* %a2 to <1024 x i1>*
%v1 = load <1024 x i1>, <1024 x i1>* %v0, align 128
%v2 = tail call { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <1024 x i1> %v1)
%v3 = extractvalue { <32 x i32>, <1024 x i1> } %v2, 0
ret <32 x i32> %v3
%v0 = bitcast i8* %a2 to <32 x i32>*
%v1 = load <32 x i32>, <32 x i32>* %v0, align 128
%v2 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %v1, i32 -1)
%v3 = tail call { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <128 x i1> %v2)
%v4 = extractvalue { <32 x i32>, <128 x i1> } %v3, 0
ret <32 x i32> %v4
}
; CHECK-LABEL: f1:
; CHECK: v{{[0-9]+}}.w = vsub(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <32 x i32> @f1(<32 x i32> %a0, <32 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
%v0 = bitcast i8* %a2 to <1024 x i1>*
%v1 = load <1024 x i1>, <1024 x i1>* %v0, align 128
%v2 = tail call { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <1024 x i1> %v1)
%v3 = extractvalue { <32 x i32>, <1024 x i1> } %v2, 0
ret <32 x i32> %v3
%v0 = bitcast i8* %a2 to <32 x i32>*
%v1 = load <32 x i32>, <32 x i32>* %v0, align 128
%v2 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %v1, i32 -1)
%v3 = tail call { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32> %a0, <32 x i32> %a1, <128 x i1> %v2)
%v4 = extractvalue { <32 x i32>, <128 x i1> } %v3, 0
ret <32 x i32> %v4
}
; Function Attrs: nounwind readnone
declare { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32>, <32 x i32>, <1024 x i1>) #1
declare { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vaddcarry.128B(<32 x i32>, <32 x i32>, <128 x i1>) #1
; Function Attrs: nounwind readnone
declare { <32 x i32>, <1024 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32>, <32 x i32>, <1024 x i1>) #1
declare { <32 x i32>, <128 x i1> } @llvm.hexagon.V6.vsubcarry.128B(<32 x i32>, <32 x i32>, <128 x i1>) #1
; Function Attrs: nounwind readnone
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv65" "target-features"="+hvxv65,+hvx-length128b" }
attributes #1 = { nounwind readnone }

View File

@ -6,29 +6,34 @@
; CHECK: v{{[0-9]+}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <16 x i32> @f0(<16 x i32> %a0, <16 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
%v0 = bitcast i8* %a2 to <512 x i1>*
%v1 = load <512 x i1>, <512 x i1>* %v0, align 64
%v2 = tail call { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32> %a0, <16 x i32> %a1, <512 x i1> %v1)
%v3 = extractvalue { <16 x i32>, <512 x i1> } %v2, 0
ret <16 x i32> %v3
%v0 = bitcast i8* %a2 to <16 x i32>*
%v1 = load <16 x i32>, <16 x i32>* %v0, align 64
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v1, i32 -1)
%v3 = tail call { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32> %a0, <16 x i32> %a1, <64 x i1> %v2)
%v4 = extractvalue { <16 x i32>, <64 x i1> } %v3, 0
ret <16 x i32> %v4
}
; CHECK-LABEL: f1:
; CHECK: v{{[0-9]+}}.w = vsub(v{{[0-9]+}}.w,v{{[0-9]+}}.w,q{{[0-3]}}):carry
define inreg <16 x i32> @f1(<16 x i32> %a0, <16 x i32> %a1, i8* nocapture readonly %a2) #0 {
b0:
%v0 = bitcast i8* %a2 to <512 x i1>*
%v1 = load <512 x i1>, <512 x i1>* %v0, align 64
%v2 = tail call { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32> %a0, <16 x i32> %a1, <512 x i1> %v1)
%v3 = extractvalue { <16 x i32>, <512 x i1> } %v2, 0
ret <16 x i32> %v3
%v0 = bitcast i8* %a2 to <16 x i32>*
%v1 = load <16 x i32>, <16 x i32>* %v0, align 64
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v1, i32 -1)
%v3 = tail call { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32> %a0, <16 x i32> %a1, <64 x i1> %v2)
%v4 = extractvalue { <16 x i32>, <64 x i1> } %v3, 0
ret <16 x i32> %v4
}
; Function Attrs: nounwind readnone
declare { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32>, <16 x i32>, <512 x i1>) #1
declare { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vaddcarry(<16 x i32>, <16 x i32>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare { <16 x i32>, <512 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32>, <16 x i32>, <512 x i1>) #1
declare { <16 x i32>, <64 x i1> } @llvm.hexagon.V6.vsubcarry(<16 x i32>, <16 x i32>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv65" "target-features"="+hvxv65,+hvx-length64b" }
attributes #1 = { nounwind readnone }

View File

@ -10,10 +10,13 @@ target triple = "hexagon"
; Function Attrs: nounwind
define void @foo(<16 x i32> %v0, <16 x i32> %v1, <16 x i32>* nocapture %p) #0 {
entry:
%0 = tail call <16 x i32> asm "$0 = vgtw($1.w,$2.w)", "=q,v,v"(<16 x i32> %v0, <16 x i32> %v1) #1
store <16 x i32> %0, <16 x i32>* %p, align 64
%0 = tail call <64 x i1> asm "$0 = vgtw($1.w,$2.w)", "=q,v,v"(<16 x i32> %v0, <16 x i32> %v1) #1
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1) #1
store <16 x i32> %1, <16 x i32>* %p, align 64
ret void
}
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }

View File

@ -8,7 +8,7 @@ target triple = "hexagon"
; CHECK-LABEL: fred
; CHECK: if (q{{[0-3]}}) vmem
define void @fred() #0 {
tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<32 x i32> undef, <32 x i32>* undef, <32 x i32> undef) #0
tail call void asm sideeffect "if ($0) vmem($1) = $2;", "q,r,v,~{memory}"(<128 x i1> undef, <32 x i32>* undef, <32 x i32> undef) #0
ret void
}

View File

@ -668,8 +668,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.b += v{{[0-9]+}}.b
define <16 x i32> @test84(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -677,8 +677,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.h += v{{[0-9]+}}.h
define <16 x i32> @test85(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -686,8 +686,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.w += v{{[0-9]+}}.w
define <16 x i32> @test86(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -695,8 +695,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.b += v{{[0-9]+}}.b
define <16 x i32> @test87(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -704,8 +704,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.h += v{{[0-9]+}}.h
define <16 x i32> @test88(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -713,8 +713,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.w += v{{[0-9]+}}.w
define <16 x i32> @test89(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -722,8 +722,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.b -= v{{[0-9]+}}.b
define <16 x i32> @test90(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -731,8 +731,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.h -= v{{[0-9]+}}.h
define <16 x i32> @test91(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -740,8 +740,8 @@ entry:
; CHECK: if (q{{[0-3]}}) v{{[0-9]+}}.w -= v{{[0-9]+}}.w
define <16 x i32> @test92(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -749,8 +749,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.b -= v{{[0-9]+}}.b
define <16 x i32> @test93(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -758,8 +758,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.h -= v{{[0-9]+}}.h
define <16 x i32> @test94(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -767,8 +767,8 @@ entry:
; CHECK: if (!q{{[0-3]}}) v{{[0-9]+}}.w -= v{{[0-9]+}}.w
define <16 x i32> @test95(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %0, <16 x i32> %c, <16 x i32> %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %0, <16 x i32> %c, <16 x i32> %b)
ret <16 x i32> %1
}
@ -999,18 +999,18 @@ declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsh.sat(<16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #0
@ -1029,6 +1029,7 @@ declare <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vsb(<16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vsh(<16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }

View File

@ -181,8 +181,8 @@ entry:
; CHECK: v{{[0-9]+}} = vmux(q{{[0-3]+}},v{{[0-9]+}},v{{[0-9]+}})
define void @test20(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %0, <16 x i32> %b, <16 x i32> %c)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %0, <16 x i32> %b, <16 x i32> %c)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -191,10 +191,11 @@ entry:
; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},q{{[0-3]+}})
define void @test21(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@ -202,10 +203,11 @@ entry:
; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},q{{[0-3]+}})
define void @test22(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@ -213,9 +215,10 @@ entry:
; CHECK: q{{[0-3]+}} = not(q{{[0-3]+}})
define void @test23(<16 x i32> %a) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %0)
store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %0)
%2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
store <16 x i32> %2, <16 x i32>* @h, align 64
ret void
}
@ -223,10 +226,11 @@ entry:
; CHECK: q{{[0-3]+}} = xor(q{{[0-3]+}},q{{[0-3]+}})
define void @test24(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@ -234,10 +238,11 @@ entry:
; CHECK: q{{[0-3]+}} = or(q{{[0-3]+}},!q{{[0-3]+}})
define void @test25(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@ -245,10 +250,11 @@ entry:
; CHECK: q{{[0-3]+}} = and(q{{[0-3]+}},!q{{[0-3]+}})
define void @test26(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = bitcast <16 x i32> %b to <512 x i1>
%2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %0, <512 x i1> %1)
store <512 x i1> %2, <512 x i1>* bitcast (<16 x i32>* @h to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %0, <64 x i1> %1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %2, i32 -1)
store <16 x i32> %3, <16 x i32>* @h, align 64
ret void
}
@ -256,8 +262,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test27(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -265,8 +272,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test28(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -274,8 +282,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test29(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -283,8 +292,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test30(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -292,8 +302,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test31(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -301,8 +312,9 @@ entry:
; CHECK: q{{[0-3]+}} = vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test32(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %a, <16 x i32> %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -310,8 +322,8 @@ entry:
; CHECK: v{{[0-9]+}} |= vand(q{{[0-3]+}},r{{[0-9]+}})
define void @test33(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
entry:
%0 = bitcast <16 x i32> %b to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <512 x i1> %0, i32 %c)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %a, <64 x i1> %0, i32 %c)
store <16 x i32> %1, <16 x i32>* @h, align 64
ret void
}
@ -320,9 +332,10 @@ entry:
; CHECK: q{{[0-3]+}} |= vand(v{{[0-9]+}},r{{[0-9]+}})
define void @test34(<16 x i32> %a, <16 x i32> %b, i32 %c) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %0, <16 x i32> %b, i32 %c)
store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %0, <16 x i32> %b, i32 %c)
%2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %1, i32 -1)
store <16 x i32> %2, <16 x i32>* @k, align 64
ret void
}
@ -330,8 +343,8 @@ entry:
; CHECK: v{{[0-9]+}} = vand(q{{[0-3]+}},r{{[0-9]+}})
define void @test35(<16 x i32> %a, i32 %b) #0 {
entry:
%0 = bitcast <16 x i32> %a to <512 x i1>
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %0, i32 %b)
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 %b)
store <16 x i32> %1, <16 x i32>* @h, align 64
ret void
}
@ -340,8 +353,9 @@ entry:
; CHECK: q{{[0-3]+}} = vand(v{{[0-9]+}},r{{[0-9]+}})
define void @test36(<16 x i32> %a, i32 %b) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 %b)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -476,8 +490,9 @@ entry:
; CHECK: q{{[0-3]}} = vsetq(r{{[0-9]+}})
define void @test51(i32 %a) #0 {
entry:
%0 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
store <512 x i1> %0, <512 x i1>* bitcast (<16 x i32>* @k to <512 x i1>*), align 64
%0 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a)
%1 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %0, i32 -1)
store <16 x i32> %1, <16 x i32>* @k, align 64
ret void
}
@ -546,23 +561,23 @@ declare <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #0
declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #0
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #0
declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #0
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p(i64, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p.acc(i64, i64, i32) #0
declare i64 @llvm.hexagon.S6.rol.i.p.and(i64, i64, i32) #0
@ -577,7 +592,7 @@ declare i32 @llvm.hexagon.S6.rol.i.r.or(i32, i32, i32) #0
declare i32 @llvm.hexagon.S6.rol.i.r.xacc(i32, i32, i32) #0
declare i32 @llvm.hexagon.V6.extractw(<16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32>, <16 x i32>, i32) #0
declare <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32>, <16 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32>, <16 x i32>, <16 x i32>, i32) #0

View File

@ -1,15 +1,16 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
@d = external global <16 x i32>
@d = external global <16 x i32>, align 64
; CHECK-LABEL: test1:
; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test1(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -17,10 +18,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test2(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -28,10 +30,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test3(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -39,10 +42,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test4(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -50,10 +54,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test5(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -61,10 +66,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test6(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -72,10 +78,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test7(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -83,10 +90,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test8(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -94,10 +102,11 @@ entry:
; CHECK: q{{[0-9]}} &= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
define void @test9(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -105,10 +114,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test10(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -116,10 +126,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test11(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -127,10 +138,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test12(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -138,10 +150,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test13(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -149,10 +162,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test14(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -160,10 +174,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test15(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -171,10 +186,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test16(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -182,10 +198,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test17(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -193,10 +210,11 @@ entry:
; CHECK: q{{[0-9]}} |= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
define void @test18(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -204,10 +222,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test19(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -215,10 +234,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test20(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -226,10 +246,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.eq(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test21(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -237,10 +258,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.b,v{{[0-9]+}}.b)
define void @test22(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -248,10 +270,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
define void @test23(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -259,10 +282,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.w,v{{[0-9]+}}.w)
define void @test24(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -270,10 +294,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub)
define void @test25(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -281,10 +306,11 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh)
define void @test26(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
@ -292,39 +318,42 @@ entry:
; CHECK: q{{[0-9]}} ^= vcmp.gt(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw)
define void @test27(<16 x i32> %a, <16 x i32> %b) #0 {
entry:
%0 = load <512 x i1>, <512 x i1>* bitcast (<16 x i32>* @d to <512 x i1>*), align 64
%1 = tail call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %0, <16 x i32> %a, <16 x i32> %b)
%2 = bitcast <512 x i1> %1 to <16 x i32>
store <16 x i32> %2, <16 x i32>* @d, align 64
%v0 = load <16 x i32>, <16 x i32>* @d, align 64
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1> %v1, <16 x i32> %a, <16 x i32> %b)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v2, i32 -1)
store <16 x i32> %v3, <16 x i32>* @d, align 64
ret void
}
declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #0
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #0
attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }

View File

@ -12,30 +12,32 @@
; CHECK-LABEL: V6_vmaskedstorentnq_128B
; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1>, i8*, <32 x i32>)
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
declare void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstoreq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstoreq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstoreq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1>, i8*, <32 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorenq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstorenq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorenq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1>, i8*, <32 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorentq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstorentq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}
declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1>, i8*, <32 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1>, i8*, <32 x i32>)
define void @V6_vmaskedstorentnq_128B( <32 x i32> %a, i8* %b, <32 x i32> %c) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<1024 x i1> %1, i8* %b, <32 x i32> %c)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentnq.128B(<128 x i1> %1, i8* %b, <32 x i32> %c)
ret void
}

View File

@ -12,30 +12,32 @@
; CHECK-LABEL: V6_vmaskedstorentnq
; CHECK: if (!q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0):nt = v{{[0-9]+}}
declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>)
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstoreq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
declare void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1>, i8*, <16 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorenq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstorenq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorenq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
declare void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1>, i8*, <16 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorentq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstorentq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}
declare void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1>, i8*, <16 x i32>)
declare void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1>, i8*, <16 x i32>)
define void @V6_vmaskedstorentnq( <16 x i32> %a, i8* %b, <16 x i32> %c) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vmaskedstorentnq(<512 x i1> %1, i8* %b, <16 x i32> %c)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vmaskedstorentnq(<64 x i1> %1, i8* %b, <16 x i32> %c)
ret void
}

View File

@ -19,6 +19,8 @@
; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
declare void @llvm.hexagon.V6.vgathermw.128B(i8*, i32, i32, <32 x i32>)
define void @V6_vgathermw_128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
call void @llvm.hexagon.V6.vgathermw.128B(i8* %a, i32 %b, i32 %c, <32 x i32> %d)
@ -37,24 +39,24 @@ define void @V6_vgathermhw_128B(i8* %a, i32 %b, i32 %c, <64 x i32> %d) {
ret void
}
declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
declare void @llvm.hexagon.V6.vgathermwq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %b to <1024 x i1>
call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}
declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <1024 x i1>, i32, i32, <32 x i32>)
declare void @llvm.hexagon.V6.vgathermhq.128B(i8*, <128 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermhq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %b to <1024 x i1>
call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}
declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <1024 x i1>, i32, i32, <64 x i32>)
declare void @llvm.hexagon.V6.vgathermhwq.128B(i8*, <128 x i1>, i32, i32, <64 x i32>)
define void @V6_vgathermhwq_128B(i8* %a, <32 x i32> %b, i32 %c, i32 %d, <64 x i32> %e) {
%1 = bitcast <32 x i32> %b to <1024 x i1>
call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <1024 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhwq.128B(i8* %a, <128 x i1> %1, i32 %c, i32 %d, <64 x i32> %e)
ret void
}

View File

@ -19,6 +19,8 @@
; CHECK: if (q{{[0-3]+}}) vtmp.h = vgather(r1,m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h
; CHECK: vmem(r{{[0-9]+}}+#0) = vtmp.new
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
declare void @llvm.hexagon.V6.vgathermw(i8*, i32, i32, <16 x i32>)
define void @V6_vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d) {
call void @llvm.hexagon.V6.vgathermw(i8* %a, i32 %b, i32 %c, <16 x i32> %d)
@ -37,23 +39,23 @@ define void @V6_vgathermhw(i8* %a, i32 %b, i32 %c, <32 x i32> %d) {
ret void
}
declare void @llvm.hexagon.V6.vgathermwq(i8*, <512 x i1>, i32, i32, <16 x i32>)
declare void @llvm.hexagon.V6.vgathermwq(i8*, <64 x i1>, i32, i32, <16 x i32>)
define void @V6_vgathermwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %b to <512 x i1>
call void @llvm.hexagon.V6.vgathermwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
ret void
}
declare void @llvm.hexagon.V6.vgathermhq(i8*, <512 x i1>, i32, i32, <16 x i32>)
declare void @llvm.hexagon.V6.vgathermhq(i8*, <64 x i1>, i32, i32, <16 x i32>)
define void @V6_vgathermhq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %b to <512 x i1>
call void @llvm.hexagon.V6.vgathermhq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <16 x i32> %e)
ret void
}
declare void @llvm.hexagon.V6.vgathermhwq(i8*, <512 x i1>, i32, i32, <32 x i32>)
declare void @llvm.hexagon.V6.vgathermhwq(i8*, <64 x i1>, i32, i32, <32 x i32>)
define void @V6_vgathermhwq(i8* %a, <16 x i32> %b, i32 %c, i32 %d, <32 x i32> %e) {
%1 = bitcast <16 x i32> %b to <512 x i1>
call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <512 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %b, i32 -1)
call void @llvm.hexagon.V6.vgathermhwq(i8* %a, <64 x i1> %1, i32 %c, i32 %d, <32 x i32> %e)
ret void
}

View File

@ -19,6 +19,7 @@
; CHECK-LABEL: V6_vscattermhwq_128B
; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}
declare <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32>, i32)
declare void @llvm.hexagon.V6.vscattermw.128B(i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermw_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d) {
@ -44,17 +45,17 @@ define void @V6_vscattermh_add_128B(i32 %a, i32 %b, <32 x i32> %c, <32 x i32> %d
ret void
}
declare void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
declare void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermwq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vscattermwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermwq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
ret void
}
declare void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1>, i32, i32, <32 x i32>, <32 x i32>)
declare void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1>, i32, i32, <32 x i32>, <32 x i32>)
define void @V6_vscattermhq_128B(<32 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vscattermhq.128B(<1024 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhq.128B(<128 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <32 x i32> %e)
ret void
}
@ -70,9 +71,9 @@ define void @V6_vscattermhw_add_128B(i32 %a, i32 %b, <64 x i32> %c, <32 x i32> %
ret void
}
declare void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1>, i32, i32, <64 x i32>, <32 x i32>)
declare void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1>, i32, i32, <64 x i32>, <32 x i32>)
define void @V6_vscattermhwq_128B(<32 x i32> %a, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e) {
%1 = bitcast <32 x i32> %a to <1024 x i1>
call void @llvm.hexagon.V6.vscattermhwq.128B(<1024 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
%1 = tail call <128 x i1> @llvm.hexagon.V6.vandvrt.128B(<32 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhwq.128B(<128 x i1> %1, i32 %b, i32 %c, <64 x i32> %d, <32 x i32> %e)
ret void
}

View File

@ -19,6 +19,7 @@
; CHECK-LABEL: V6_vscattermhwq
; CHECK: if (q{{[0-3]}}) vscatter(r{{[0-9]+}},m{{[0-9]+}},v{{[0-9]+}}:{{[0-9]+}}.w).h = v{{[0-9]+}}
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
declare void @llvm.hexagon.V6.vscattermw(i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermw(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
@ -44,17 +45,17 @@ define void @V6_vscattermh_add(i32 %a, i32 %b, <16 x i32> %c, <16 x i32> %d) {
ret void
}
declare void @llvm.hexagon.V6.vscattermwq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
declare void @llvm.hexagon.V6.vscattermwq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermwq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vscattermwq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermwq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
ret void
}
declare void @llvm.hexagon.V6.vscattermhq(<512 x i1>, i32, i32, <16 x i32>, <16 x i32>)
declare void @llvm.hexagon.V6.vscattermhq(<64 x i1>, i32, i32, <16 x i32>, <16 x i32>)
define void @V6_vscattermhq(<16 x i32> %a, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vscattermhq(<512 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhq(<64 x i1> %1, i32 %b, i32 %c, <16 x i32> %d, <16 x i32> %e)
ret void
}
@ -70,9 +71,9 @@ define void @V6_vscattermhw_add(i32 %a, i32 %b, <32 x i32> %c, <16 x i32> %d) {
ret void
}
declare void @llvm.hexagon.V6.vscattermhwq(<512 x i1>, i32, i32, <32 x i32>, <16 x i32>)
declare void @llvm.hexagon.V6.vscattermhwq(<64 x i1>, i32, i32, <32 x i32>, <16 x i32>)
define void @V6_vscattermhwq(<16 x i32> %a, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e) {
%1 = bitcast <16 x i32> %a to <512 x i1>
call void @llvm.hexagon.V6.vscattermhwq(<512 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %a, i32 -1)
call void @llvm.hexagon.V6.vscattermhwq(<64 x i1> %1, i32 %b, i32 %c, <32 x i32> %d, <16 x i32> %e)
ret void
}

View File

@ -136,21 +136,21 @@ define <16 x i32> @V6_vmpyuhe(<16 x i32> %a, i32 %b) {
}
; CHECK: = vmpye(v0.uh,r0.uh)
;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1>)
;define <16 x i32> @V6_vprefixqb(<512 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<512 x i1> %a)
;declare <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1>)
;define <16 x i32> @V6_vprefixqb(<64 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqb(<64 x i1> %a)
; ret <16 x i32> %b
;}
;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1>)
;define <16 x i32> @V6_vprefixqh(<512 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<512 x i1> %a)
;declare <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1>)
;define <16 x i32> @V6_vprefixqh(<64 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqh(<64 x i1> %a)
; ret <16 x i32> %b
;}
;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1>)
;define <16 x i32> @V6_vprefixqw(<512 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<512 x i1> %a)
;declare <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1>)
;define <16 x i32> @V6_vprefixqw(<64 x i1> %a) {
; %b = call <16 x i32> @llvm.hexagon.V6.vprefixqw(<64 x i1> %a)
; ret <16 x i32> %b
;}

View File

@ -28,10 +28,10 @@ b0:
%v13 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v12)
%v14 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v13)
%v15 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v14)
%v16 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
%v16 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
%v17 = shl i32 1, %v8
%v18 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v17)
%v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <512 x i1> %v16, i32 %v18)
%v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v15, <64 x i1> %v16, i32 %v18)
%v20 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %a3)
%v21 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v20)
%v22 = icmp sgt i32 %v5, 0
@ -48,8 +48,8 @@ b1: ; preds = %b0
%v30 = getelementptr inbounds i8, i8* %a0, i32 %v29
%v31 = bitcast i8* %v30 to <16 x i32>*
%v32 = load <16 x i32>, <16 x i32>* %v31, align 64, !tbaa !0
%v33 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v33, i32 16843009)
%v33 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v33, i32 16843009)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v34)
%v36 = add i32 %v0, %a5
%v37 = getelementptr inbounds i8, i8* %a0, i32 %v36
@ -127,11 +127,11 @@ b4: ; preds = %b4, %b3
%v100 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v94, <16 x i32> %v91)
%v101 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v97, <16 x i32> %v99)
%v102 = tail call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %v98, <16 x i32> %v100)
%v103 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
%v104 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
%v105 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %v103, <512 x i1> %v104)
%v103 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101, <16 x i32> %v96)
%v104 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95, <16 x i32> %v102)
%v105 = tail call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %v103, <64 x i1> %v104)
%v106 = tail call i32 @llvm.hexagon.S6.rol.i.r(i32 %v83, i32 1)
%v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <512 x i1> %v105, i32 %v106)
%v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v86, <64 x i1> %v105, i32 %v106)
%v108 = add nsw i32 %v79, -64
%v109 = icmp sgt i32 %v79, 64
br i1 %v109, label %b4, label %b5
@ -179,16 +179,16 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S2.vsplatrb(i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vd0() #1
@ -212,10 +212,10 @@ declare <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S6.rol.i.r(i32, i32) #1

View File

@ -15,35 +15,35 @@ b0:
br i1 %v0, label %b1, label %b2
b1: ; preds = %b0
%v1 = tail call <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1> undef) #2
%v1 = tail call <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1> undef) #2
br label %b2
b2: ; preds = %b1, %b0
%v2 = phi <1024 x i1> [ %v1, %b1 ], [ undef, %b0 ]
%v2 = phi <128 x i1> [ %v1, %b1 ], [ undef, %b0 ]
br label %b3
b3: ; preds = %b3, %b2
%v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
%v3 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v2, <32 x i32> undef, <32 x i32> undef) #2
%v4 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> undef, <32 x i32> %v3) #2
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32> %v4, <32 x i32> undef) #2
%v6 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
%v7 = tail call <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1> %v6, <1024 x i1> undef) #2
%v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<32 x i32> undef, <32 x i32>* undef, <32 x i32> %v8) #2
%v6 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v5, <32 x i32> undef) #2
%v7 = tail call <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1> %v6, <128 x i1> undef) #2
%v8 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v7, <32 x i32> undef, <32 x i32> undef) #2
tail call void asm sideeffect "if($0) vmem($1)=$2;", "q,r,v,~{memory}"(<128 x i1> undef, <32 x i32>* undef, <32 x i32> %v8) #2
br label %b3
}
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.pred.or.128B(<1024 x i1>, <1024 x i1>) #1
declare <128 x i1> @llvm.hexagon.V6.pred.or.128B(<128 x i1>, <128 x i1>) #1
; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1
declare <128 x i1> @llvm.hexagon.V6.pred.not.128B(<128 x i1>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vor.128B(<32 x i32>, <32 x i32>) #1

View File

@ -25,36 +25,36 @@ b3: ; preds = %b3, %b2
%v7 = load <16 x i32>, <16 x i32>* %v6, align 64, !tbaa !0
%v8 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> undef, <16 x i32> %v7, i32 4)
%v9 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v8, <16 x i32> zeroinitializer)
%v10 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
%v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
%v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
%v10 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v9, <16 x i32> undef)
%v11 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v10, <16 x i32> undef, <16 x i32> undef)
%v12 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v11, <16 x i32> undef)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> undef, <16 x i32> %v12, <16 x i32> undef)
%v14 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> undef, i32 1)
%v15 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v14, <16 x i32> zeroinitializer)
%v16 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
%v17 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v18 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
%v19 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v20 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
%v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
%v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
%v17 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v18 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v15, <16 x i32> undef)
%v19 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> zeroinitializer, <16 x i32> undef)
%v20 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v16, <16 x i32> undef)
%v21 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v19, <16 x i32> undef, <16 x i32> zeroinitializer)
%v22 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v20, <16 x i32> undef, <16 x i32> zeroinitializer)
%v23 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v22, <16 x i32> %v21)
%v24 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> zeroinitializer, <32 x i32> %v23, i32 16843009)
%v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
%v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
%v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
%v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
%v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v17, <16 x i32> %v13, <16 x i32> undef)
%v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v18, <16 x i32> %v25, <16 x i32> undef)
%v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v19, <16 x i32> %v26, <16 x i32> undef)
%v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v20, <16 x i32> %v27, <16 x i32> undef)
%v29 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> undef, <16 x i32> zeroinitializer)
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> zeroinitializer, <16 x i32> zeroinitializer)
%v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
%v32 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
%v33 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
%v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> undef, <16 x i32> undef)
%v32 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v29, <16 x i32> undef)
%v33 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> undef)
%v34 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v24, <32 x i32> zeroinitializer, i32 16843009)
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v34, <32 x i32> undef, i32 16843009)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
%v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
%v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> zeroinitializer, <16 x i32> %v28, <16 x i32> undef)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v36, <16 x i32> undef)
%v38 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v32, <16 x i32> %v37, <16 x i32> undef)
%v39 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v33, <16 x i32> %v38, <16 x i32> undef)
%v40 = add nsw i32 %v3, 3
%v41 = icmp eq i32 %v40, 5
br i1 %v41, label %b4, label %b3
@ -85,13 +85,13 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1

View File

@ -28,13 +28,13 @@ declare i32 @printf(i8*, ...) #0
declare void @print_vecpred(i32, i8*) #0
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind
declare void @init_vectors() #0
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
@ -43,7 +43,7 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare void @init_addresses() #0
; Function Attrs: nounwind
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind
define i32 @main() #0 {
@ -63,13 +63,13 @@ entry:
%7 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%call1381 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str251, i32 0, i32 0)) #3
%8 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%9 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
%9 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 16843009)
call void @print_vector(i32 64, i8* bitcast (<16 x i32>* @VectorResult to i8*))
%10 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%11 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
%12 = bitcast <512 x i1> %11 to <16 x i32>
%13 = bitcast <16 x i32> %12 to <512 x i1>
%14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %13, <16 x i32> undef, <16 x i32> undef)
%11 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %10, i32 16843009)
%12 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %11, i32 -1)
%13 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %12, i32 -1)
%14 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %13, <16 x i32> undef, <16 x i32> undef)
store <16 x i32> %14, <16 x i32>* @VectorResult, align 64
ret i32 0
}

View File

@ -114,12 +114,12 @@ b4: ; preds = %b3
%v91 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v90, i32 1)
%v92 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v91, i32 1)
%v93 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %v92, i32 1)
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> %v93)
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> undef, <16 x i32> %v94, <16 x i32> undef)
%v96 = tail call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> undef, i32 1)
%v97 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
%v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
%v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> undef)
%v97 = tail call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %v96, <16 x i32> %v95)
%v98 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v97, <16 x i32> undef, <16 x i32> undef)
%v99 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> undef, <16 x i32> undef, <16 x i32> undef)
%v100 = tail call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %v99, <16 x i32> %v98)
%v101 = tail call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %v100, <16 x i32> undef)
%v102 = getelementptr inbounds <16 x i32>, <16 x i32>* %v2, i32 1
@ -183,13 +183,13 @@ declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32>, i32) #1

View File

@ -82,14 +82,14 @@ entry:
%asmresult58 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 29
%asmresult59 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 30
%asmresult60 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %1, 31
%2 = tail call { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } asm "nop", "=q,=q,=q,=q"() #1
%asmresult61 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 0
%asmresult62 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 1
%asmresult63 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 2
%asmresult64 = extractvalue { <16 x i32>, <16 x i32>, <16 x i32>, <16 x i32> } %2, 3
%3 = tail call <16 x i32> asm "nop", "=q,q,q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63, <16 x i32> %asmresult64) #1
tail call void asm sideeffect "nop", "q,q,q"(<16 x i32> %asmresult61, <16 x i32> %asmresult62, <16 x i32> %asmresult63) #2
tail call void asm sideeffect "nop", "q,q"(<16 x i32> %asmresult64, <16 x i32> %3) #2
%2 = tail call { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } asm "nop", "=q,=q,=q,=q"() #1
%asmresult61 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 0
%asmresult62 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 1
%asmresult63 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 2
%asmresult64 = extractvalue { <64 x i1>, <64 x i1>, <64 x i1>, <64 x i1> } %2, 3
%3 = tail call <64 x i1> asm "nop", "=q,q,q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63, <64 x i1> %asmresult64) #1
tail call void asm sideeffect "nop", "q,q,q"(<64 x i1> %asmresult61, <64 x i1> %asmresult62, <64 x i1> %asmresult63) #2
tail call void asm sideeffect "nop", "q,q"(<64 x i1> %asmresult64, <64 x i1> %3) #2
tail call void asm sideeffect "nop", "v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v,v"(<16 x i32> %asmresult29, <16 x i32> %asmresult30, <16 x i32> %asmresult31, <16 x i32> %asmresult32, <16 x i32> %asmresult33, <16 x i32> %asmresult34, <16 x i32> %asmresult35, <16 x i32> %asmresult36, <16 x i32> %asmresult37, <16 x i32> %asmresult38, <16 x i32> %asmresult39, <16 x i32> %asmresult40, <16 x i32> %asmresult41, <16 x i32> %asmresult42, <16 x i32> %asmresult43, <16 x i32> %asmresult44, <16 x i32> %asmresult45, <16 x i32> %asmresult46, <16 x i32> %asmresult47, <16 x i32> %asmresult48, <16 x i32> %asmresult49, <16 x i32> %asmresult50, <16 x i32> %asmresult51, <16 x i32> %asmresult52, <16 x i32> %asmresult53, <16 x i32> %asmresult54, <16 x i32> %asmresult55, <16 x i32> %asmresult56, <16 x i32> %asmresult57, <16 x i32> %asmresult58, <16 x i32> %asmresult59, <16 x i32> %asmresult60) #2
tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14, i32 %asmresult15, i32 %asmresult16, i32 %asmresult17, i32 %asmresult18, i32 %asmresult19, i32 %asmresult20, i32 %asmresult21, i32 %asmresult22, i32 %asmresult23, i32 %asmresult24, i32 %asmresult25, i32 %asmresult26, i32 %asmresult27, i32 %asmresult28) #2
ret void

View File

@ -32,7 +32,7 @@ b7: ; preds = %b6
br label %b8
b8: ; preds = %b7
%v0 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
%v0 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> undef, i32 -1)
br i1 undef, label %b9, label %b11
b9: ; preds = %b8
@ -42,9 +42,9 @@ b10: ; preds = %b12
br label %b11
b11: ; preds = %b10, %b8
%v1 = phi <512 x i1> [ %v0, %b8 ], [ undef, %b10 ]
%v2 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v1, <512 x i1> undef)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
%v1 = phi <64 x i1> [ %v0, %b8 ], [ undef, %b10 ]
%v2 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v1, <64 x i1> undef)
%v3 = tail call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v2, <16 x i32> undef, <16 x i32> undef)
%v4 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %v3, i32 undef)
%v5 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v4, <16 x i32> undef, i32 undef)
%v6 = tail call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %v5, <16 x i32> undef)
@ -53,9 +53,9 @@ b11: ; preds = %b10, %b8
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32> undef, <16 x i32> %v8)
%v10 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v9)
%v11 = tail call <16 x i32> @llvm.hexagon.V6.vor(<16 x i32> %v10, <16 x i32> undef)
%v12 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v12, i32 undef)
tail call void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1> undef, i8* undef, <16 x i32> %v13)
%v12 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v11, i32 -1)
%v13 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v12, i32 undef)
tail call void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1> undef, i8* undef, <16 x i32> %v13)
unreachable
b12: ; preds = %b12, %b9
@ -69,22 +69,22 @@ b13: ; preds = %b5
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: argmemonly nounwind
declare void @llvm.hexagon.V6.vmaskedstoreq(<512 x i1>, i8*, <16 x i32>) #2
declare void @llvm.hexagon.V6.vmaskedstoreq(<64 x i1>, i8*, <16 x i32>) #2
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vor(<16 x i32>, <16 x i32>) #1

View File

@ -25,9 +25,9 @@ b3: ; preds = %b3, %b2
%v2 = phi i32 [ 0, %b2 ], [ %v8, %b3 ]
%v3 = phi <32 x i32> [ zeroinitializer, %b2 ], [ %v0, %b3 ]
%v4 = phi <32 x i32> [ %v1, %b2 ], [ %v7, %b3 ]
%v5 = tail call <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
%v6 = tail call <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
%v5 = tail call <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32> %v3, <32 x i32> undef)
%v6 = tail call <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1> %v5, <32 x i32> undef, <32 x i32> undef)
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1> %v6, <32 x i32> %v4, <32 x i32> undef)
%v8 = add nsw i32 %v2, 1
%v9 = icmp slt i32 %v8, %a2
br i1 %v9, label %b3, label %b4
@ -40,13 +40,13 @@ b5: ; preds = %b4, %b0
}
; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.vgtuh.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.veqh.and.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.veqh.and.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vaddhq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1

View File

@ -17,9 +17,9 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #0
declare <16 x i32> @llvm.hexagon.V6.vd0() #0
declare <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #0
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #0
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0
declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) #0
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #0
@ -106,48 +106,48 @@ b6: ; preds = %b6, %b5
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32> %v8, <16 x i32> %v47) #2
%v54 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v45, <16 x i32> %v47) #2
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v49, <16 x i32> %v47) #2
%v56 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
%v57 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
%v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
%v56 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v54, <16 x i32> %v7) #2
%v57 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v55, <16 x i32> %v7) #2
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v9, <16 x i32> %v10) #2
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v57, <16 x i32> %v58, <16 x i32> %v9) #2
%v60 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v56, <16 x i32> %v8, <16 x i32> %v45) #2
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v57, <16 x i32> %v8, <16 x i32> %v49) #2
%v62 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v61, <16 x i32> %v60) #2
%v63 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v53, <32 x i32> %v62, i32 -1) #2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v45, <16 x i32> %v44, i32 1) #2
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v49, <16 x i32> %v48, i32 1) #2
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v64, <16 x i32> %v47) #2
%v67 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v65, <16 x i32> %v47) #2
%v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
%v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
%v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
%v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v7) #2
%v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v67, <16 x i32> %v7) #2
%v70 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v59, <16 x i32> %v9) #2
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v70, <16 x i32> %v9) #2
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v8, <16 x i32> %v64) #2
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v8, <16 x i32> %v65) #2
%v74 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v73, <16 x i32> %v72) #2
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v63, <32 x i32> %v74, i32 -1) #2
%v76 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v50, <16 x i32> %v45, i32 1) #2
%v77 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v52, <16 x i32> %v49, i32 1) #2
%v78 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v76, <16 x i32> %v47) #2
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v77, <16 x i32> %v47) #2
%v80 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
%v81 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
%v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
%v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
%v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
%v80 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v78, <16 x i32> %v7) #2
%v81 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v79, <16 x i32> %v7) #2
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v80, <16 x i32> %v71, <16 x i32> %v9) #2
%v83 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v81, <16 x i32> %v82, <16 x i32> %v9) #2
%v84 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v80, <16 x i32> %v8, <16 x i32> %v76) #2
%v85 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v81, <16 x i32> %v8, <16 x i32> %v77) #2
%v86 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v85, <16 x i32> %v84) #2
%v87 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v75, <32 x i32> %v86, i32 -1) #2
%v88 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v47, <16 x i32> %v46, i32 1) #2
%v89 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v51, <16 x i32> %v47, i32 1) #2
%v90 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v88, <16 x i32> %v47) #2
%v91 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v89, <16 x i32> %v47) #2
%v92 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
%v93 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
%v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
%v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
%v92 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v90, <16 x i32> %v7) #2
%v93 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v91, <16 x i32> %v7) #2
%v94 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v92, <16 x i32> %v83, <16 x i32> %v9) #2
%v95 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v93, <16 x i32> %v94, <16 x i32> %v9) #2
%v96 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v92, <16 x i32> %v8, <16 x i32> %v88) #2
%v97 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v93, <16 x i32> %v8, <16 x i32> %v89) #2
%v98 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v97, <16 x i32> %v96) #2
%v99 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v87, <32 x i32> %v98, i32 -1) #2
%v100 = tail call <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32> %v95, <16 x i32> %v4, i32 0) #2

View File

@ -12,8 +12,9 @@ b0:
store i32 %a0, i32* %v0, align 4
store <16 x i32> %a1, <16 x i32>* %v1, align 64
%v3 = load i32, i32* %v0, align 4
%v4 = load <16 x i32>, <16 x i32>* %v2, align 64
call void asm sideeffect " $1 = vsetq($0);\0A", "r,q"(i32 %v3, <16 x i32> %v4) #1, !srcloc !0
%v4 = tail call <64 x i1> asm sideeffect " $0 = vsetq($1);\0A", "=q,r"(i32 %v3) #1, !srcloc !0
%v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
store <16 x i32> %v5, <16 x i32>* %v2, align 64
ret void
}
@ -23,7 +24,9 @@ b0:
ret i32 0
}
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = !{i32 222}

View File

@ -44,10 +44,10 @@ b3: ; preds = %b3, %b2
%v28 = bitcast i8* %v27 to <16 x i32>*
%v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v25, <16 x i32> %v14)
%v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
@ -56,22 +56,22 @@ b3: ; preds = %b3, %b2
%v40 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v36, <16 x i32> %v14)
%v41 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v37, <16 x i32> %v14)
%v42 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v38, <16 x i32> %v14)
%v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
%v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
@ -80,22 +80,22 @@ b3: ; preds = %b3, %b2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v60, <16 x i32> %v14)
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v61, <16 x i32> %v14)
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v62, <16 x i32> %v14)
%v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
%v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
%v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
%v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v83 = add nsw i32 %v15, 1
%v84 = icmp eq i32 %v83, 5
br i1 %v84, label %b4, label %b3
@ -147,16 +147,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1

View File

@ -28,7 +28,7 @@ b3: ; preds = %b2
b4: ; preds = %b4, %b3
%v3 = phi <32 x i32> [ %v5, %b4 ], [ undef, %b3 ]
%v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
%v4 = tail call <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> %v3) #2
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32> %v3, <32 x i32> %v2) #2
br label %b4
@ -43,7 +43,7 @@ declare void @f1(i8* nocapture readonly, i8* nocapture readonly, i8* nocapture,
declare <32 x i32> @llvm.hexagon.V6.vd0.128B() #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vsubhnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vavguh.128B(<32 x i32>, <32 x i32>) #1

View File

@ -32,76 +32,76 @@ b0:
%v2 = call <16 x i32> @llvm.hexagon.V6.vd0()
store <16 x i32> %v2, <16 x i32>* @g2, align 64
%v3 = load <16 x i32>, <16 x i32>* @g3, align 64
%v4 = bitcast <16 x i32> %v3 to <512 x i1>
%v4 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v3, i32 -1)
%v5 = load <16 x i32>, <16 x i32>* @g2, align 64
%v6 = load <16 x i32>, <16 x i32>* @g1, align 64
%v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
%v7 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %v4, <16 x i32> %v5, <16 x i32> %v6)
store <16 x i32> %v7, <16 x i32>* @g2, align 64
%v8 = load <16 x i32>, <16 x i32>* @g3, align 64
%v9 = bitcast <16 x i32> %v8 to <512 x i1>
%v9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v8, i32 -1)
%v10 = load <16 x i32>, <16 x i32>* @g2, align 64
%v11 = load <16 x i32>, <16 x i32>* @g1, align 64
%v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
%v12 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %v9, <16 x i32> %v10, <16 x i32> %v11)
store <16 x i32> %v12, <16 x i32>* @g2, align 64
%v13 = load <16 x i32>, <16 x i32>* @g3, align 64
%v14 = bitcast <16 x i32> %v13 to <512 x i1>
%v14 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v13, i32 -1)
%v15 = load <16 x i32>, <16 x i32>* @g2, align 64
%v16 = load <16 x i32>, <16 x i32>* @g1, align 64
%v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
%v17 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %v14, <16 x i32> %v15, <16 x i32> %v16)
store <16 x i32> %v17, <16 x i32>* @g2, align 64
%v18 = load <16 x i32>, <16 x i32>* @g3, align 64
%v19 = bitcast <16 x i32> %v18 to <512 x i1>
%v19 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v18, i32 -1)
%v20 = load <16 x i32>, <16 x i32>* @g2, align 64
%v21 = load <16 x i32>, <16 x i32>* @g1, align 64
%v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
%v22 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %v19, <16 x i32> %v20, <16 x i32> %v21)
store <16 x i32> %v22, <16 x i32>* @g2, align 64
%v23 = load <16 x i32>, <16 x i32>* @g3, align 64
%v24 = bitcast <16 x i32> %v23 to <512 x i1>
%v24 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v23, i32 -1)
%v25 = load <16 x i32>, <16 x i32>* @g2, align 64
%v26 = load <16 x i32>, <16 x i32>* @g1, align 64
%v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
%v27 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %v24, <16 x i32> %v25, <16 x i32> %v26)
store <16 x i32> %v27, <16 x i32>* @g2, align 64
%v28 = load <16 x i32>, <16 x i32>* @g3, align 64
%v29 = bitcast <16 x i32> %v28 to <512 x i1>
%v29 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v28, i32 -1)
%v30 = load <16 x i32>, <16 x i32>* @g2, align 64
%v31 = load <16 x i32>, <16 x i32>* @g1, align 64
%v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
%v32 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %v29, <16 x i32> %v30, <16 x i32> %v31)
store <16 x i32> %v32, <16 x i32>* @g2, align 64
%v33 = load <16 x i32>, <16 x i32>* @g3, align 64
%v34 = bitcast <16 x i32> %v33 to <512 x i1>
%v34 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v33, i32 -1)
%v35 = load <16 x i32>, <16 x i32>* @g2, align 64
%v36 = load <16 x i32>, <16 x i32>* @g1, align 64
%v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
%v37 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v34, <16 x i32> %v35, <16 x i32> %v36)
store <16 x i32> %v37, <16 x i32>* @g2, align 64
%v38 = load <16 x i32>, <16 x i32>* @g3, align 64
%v39 = bitcast <16 x i32> %v38 to <512 x i1>
%v39 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v38, i32 -1)
%v40 = load <16 x i32>, <16 x i32>* @g2, align 64
%v41 = load <16 x i32>, <16 x i32>* @g1, align 64
%v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
%v42 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %v39, <16 x i32> %v40, <16 x i32> %v41)
store <16 x i32> %v42, <16 x i32>* @g2, align 64
%v43 = load <16 x i32>, <16 x i32>* @g3, align 64
%v44 = bitcast <16 x i32> %v43 to <512 x i1>
%v44 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v43, i32 -1)
%v45 = load <16 x i32>, <16 x i32>* @g2, align 64
%v46 = load <16 x i32>, <16 x i32>* @g1, align 64
%v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
%v47 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %v44, <16 x i32> %v45, <16 x i32> %v46)
store <16 x i32> %v47, <16 x i32>* @g2, align 64
%v48 = load <16 x i32>, <16 x i32>* @g3, align 64
%v49 = bitcast <16 x i32> %v48 to <512 x i1>
%v49 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v48, i32 -1)
%v50 = load <16 x i32>, <16 x i32>* @g2, align 64
%v51 = load <16 x i32>, <16 x i32>* @g1, align 64
%v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
%v52 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %v49, <16 x i32> %v50, <16 x i32> %v51)
store <16 x i32> %v52, <16 x i32>* @g2, align 64
%v53 = load <16 x i32>, <16 x i32>* @g3, align 64
%v54 = bitcast <16 x i32> %v53 to <512 x i1>
%v54 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v53, i32 -1)
%v55 = load <16 x i32>, <16 x i32>* @g2, align 64
%v56 = load <16 x i32>, <16 x i32>* @g1, align 64
%v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
%v57 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %v54, <16 x i32> %v55, <16 x i32> %v56)
store <16 x i32> %v57, <16 x i32>* @g2, align 64
%v58 = load <16 x i32>, <16 x i32>* @g3, align 64
%v59 = bitcast <16 x i32> %v58 to <512 x i1>
%v59 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v58, i32 -1)
%v60 = load <16 x i32>, <16 x i32>* @g2, align 64
%v61 = load <16 x i32>, <16 x i32>* @g1, align 64
%v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
%v62 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %v59, <16 x i32> %v60, <16 x i32> %v61)
store <16 x i32> %v62, <16 x i32>* @g2, align 64
ret i32 0
}
@ -110,40 +110,43 @@ b0:
declare <16 x i32> @llvm.hexagon.V6.vd0() #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }

View File

@ -48,10 +48,10 @@ b3: ; preds = %b3, %b2
%v28 = bitcast i8* %v27 to <16 x i32>*
%v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
%v30 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v25, <16 x i32> %v14)
%v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v31 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
%v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
%v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
%v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
%v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
@ -60,22 +60,22 @@ b3: ; preds = %b3, %b2
%v40 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v36, <16 x i32> %v14)
%v41 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v37, <16 x i32> %v14)
%v42 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v38, <16 x i32> %v14)
%v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v43 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
%v44 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
%v45 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
%v46 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
%v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
%v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
%v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
%v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
%v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
%v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
%v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
%v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
%v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
%v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
@ -84,22 +84,22 @@ b3: ; preds = %b3, %b2
%v64 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v60, <16 x i32> %v14)
%v65 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v61, <16 x i32> %v14)
%v66 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v62, <16 x i32> %v14)
%v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v67 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
%v68 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
%v69 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
%v70 = tail call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
%v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
%v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
%v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
%v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
%v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
%v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
%v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
%v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
%v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
%v83 = add nsw i32 %v15, 1
%v84 = icmp eq i32 %v83, 5
br i1 %v84, label %b4, label %b3
@ -151,16 +151,16 @@ declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1

View File

@ -14,8 +14,8 @@ entry:
%add = add i32 %sub, %rem
%2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
%3 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%4 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
%5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <512 x i1> %4, i32 12)
%4 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %add)
%5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %3, <64 x i1> %4, i32 12)
%and4 = and i32 %add, 511
%cmp = icmp eq i32 %and4, 0
%sMaskR.0 = select i1 %cmp, <16 x i32> %2, <16 x i32> %5
@ -23,8 +23,8 @@ entry:
br i1 %cmp547, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
%6 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
%7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %6, i32 16843009)
%6 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %boundary)
%7 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %6, i32 16843009)
%8 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %7)
%9 = add i32 %rem, %xsize
%10 = add i32 %9, -1
@ -59,9 +59,9 @@ for.end: ; preds = %for.cond.for.end_cr
}
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1

View File

@ -14,8 +14,8 @@ b0:
%v4 = add i32 %v2, %v3
%v5 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
%v6 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%v7 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
%v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <512 x i1> %v7, i32 12)
%v7 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v4)
%v8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %v6, <64 x i1> %v7, i32 12)
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v8, <16 x i32> %v8)
%v10 = and i32 %v4, 511
%v11 = icmp eq i32 %v10, 0
@ -31,8 +31,8 @@ b2: ; preds = %b1, %b0
br i1 %v14, label %b3, label %b6
b3: ; preds = %b2
%v15 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v15, i32 16843009)
%v15 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
%v16 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v15, i32 16843009)
%v17 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v16)
%v18 = add i32 %v3, %a1
%v19 = add i32 %v18, -1
@ -71,16 +71,16 @@ b6: ; preds = %b5, %b2
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1

View File

@ -372,291 +372,291 @@ entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%1 = bitcast <16 x i32> %0 to <512 x i1>
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -1)
%2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
%3 = bitcast <16 x i32> %2 to <512 x i1>
%4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
%5 = bitcast <512 x i1> %4 to <16 x i32>
%3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %2, i32 -1)
%4 = call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %1, <64 x i1> %3)
%5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %4, i32 -1)
store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
%6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%7 = bitcast <16 x i32> %6 to <512 x i1>
%7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %6, i32 -1)
%8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
%9 = bitcast <16 x i32> %8 to <512 x i1>
%10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
%11 = bitcast <512 x i1> %10 to <16 x i32>
%9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 -1)
%10 = call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %7, <64 x i1> %9)
%11 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %10, i32 -1)
store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
%12 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%13 = bitcast <16 x i32> %12 to <512 x i1>
%14 = call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %13)
%15 = bitcast <512 x i1> %14 to <16 x i32>
%13 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %12, i32 -1)
%14 = call <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1> %13)
%15 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %14, i32 -1)
store volatile <16 x i32> %15, <16 x i32>* @Q6VecPredResult, align 64
%16 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%17 = bitcast <16 x i32> %16 to <512 x i1>
%17 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %16, i32 -1)
%18 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
%19 = bitcast <16 x i32> %18 to <512 x i1>
%20 = call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %17, <512 x i1> %19)
%21 = bitcast <512 x i1> %20 to <16 x i32>
%19 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %18, i32 -1)
%20 = call <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1> %17, <64 x i1> %19)
%21 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %20, i32 -1)
store volatile <16 x i32> %21, <16 x i32>* @Q6VecPredResult, align 64
%22 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%23 = bitcast <16 x i32> %22 to <512 x i1>
%23 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %22, i32 -1)
%24 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
%25 = bitcast <16 x i32> %24 to <512 x i1>
%26 = call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %23, <512 x i1> %25)
%27 = bitcast <512 x i1> %26 to <16 x i32>
%25 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %24, i32 -1)
%26 = call <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1> %23, <64 x i1> %25)
%27 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %26, i32 -1)
store volatile <16 x i32> %27, <16 x i32>* @Q6VecPredResult, align 64
%28 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%29 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
%30 = bitcast <512 x i1> %29 to <16 x i32>
%29 = call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
%30 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %29, i32 -1)
store volatile <16 x i32> %30, <16 x i32>* @Q6VecPredResult, align 64
%31 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%32 = bitcast <16 x i32> %31 to <512 x i1>
%32 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %31, i32 -1)
%33 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%34 = call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %32, <16 x i32> %33, i32 -1)
%35 = bitcast <512 x i1> %34 to <16 x i32>
%34 = call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %32, <16 x i32> %33, i32 -1)
%35 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %34, i32 -1)
store volatile <16 x i32> %35, <16 x i32>* @Q6VecPredResult, align 64
%36 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%37 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%38 = call <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
%39 = bitcast <512 x i1> %38 to <16 x i32>
%38 = call <64 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
%39 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %38, i32 -1)
store volatile <16 x i32> %39, <16 x i32>* @Q6VecPredResult, align 64
%40 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%41 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%42 = call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
%43 = bitcast <512 x i1> %42 to <16 x i32>
%42 = call <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
%43 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %42, i32 -1)
store volatile <16 x i32> %43, <16 x i32>* @Q6VecPredResult, align 64
%44 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%45 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%46 = call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
%47 = bitcast <512 x i1> %46 to <16 x i32>
%46 = call <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
%47 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %46, i32 -1)
store volatile <16 x i32> %47, <16 x i32>* @Q6VecPredResult, align 64
%48 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%49 = bitcast <16 x i32> %48 to <512 x i1>
%49 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %48, i32 -1)
%50 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%51 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%52 = call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %49, <16 x i32> %50, <16 x i32> %51)
%53 = bitcast <512 x i1> %52 to <16 x i32>
%52 = call <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1> %49, <16 x i32> %50, <16 x i32> %51)
%53 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %52, i32 -1)
store volatile <16 x i32> %53, <16 x i32>* @Q6VecPredResult, align 64
%54 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%55 = bitcast <16 x i32> %54 to <512 x i1>
%55 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %54, i32 -1)
%56 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%57 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%58 = call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %55, <16 x i32> %56, <16 x i32> %57)
%59 = bitcast <512 x i1> %58 to <16 x i32>
%58 = call <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1> %55, <16 x i32> %56, <16 x i32> %57)
%59 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %58, i32 -1)
store volatile <16 x i32> %59, <16 x i32>* @Q6VecPredResult, align 64
%60 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%61 = bitcast <16 x i32> %60 to <512 x i1>
%61 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %60, i32 -1)
%62 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%63 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%64 = call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %61, <16 x i32> %62, <16 x i32> %63)
%65 = bitcast <512 x i1> %64 to <16 x i32>
%64 = call <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1> %61, <16 x i32> %62, <16 x i32> %63)
%65 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %64, i32 -1)
store volatile <16 x i32> %65, <16 x i32>* @Q6VecPredResult, align 64
%66 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%67 = bitcast <16 x i32> %66 to <512 x i1>
%67 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %66, i32 -1)
%68 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%69 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%70 = call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %67, <16 x i32> %68, <16 x i32> %69)
%71 = bitcast <512 x i1> %70 to <16 x i32>
%70 = call <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1> %67, <16 x i32> %68, <16 x i32> %69)
%71 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %70, i32 -1)
store volatile <16 x i32> %71, <16 x i32>* @Q6VecPredResult, align 64
%72 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%73 = bitcast <16 x i32> %72 to <512 x i1>
%73 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %72, i32 -1)
%74 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%75 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%76 = call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %73, <16 x i32> %74, <16 x i32> %75)
%77 = bitcast <512 x i1> %76 to <16 x i32>
%76 = call <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1> %73, <16 x i32> %74, <16 x i32> %75)
%77 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %76, i32 -1)
store volatile <16 x i32> %77, <16 x i32>* @Q6VecPredResult, align 64
%78 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%79 = bitcast <16 x i32> %78 to <512 x i1>
%79 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %78, i32 -1)
%80 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%81 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%82 = call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %79, <16 x i32> %80, <16 x i32> %81)
%83 = bitcast <512 x i1> %82 to <16 x i32>
%82 = call <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1> %79, <16 x i32> %80, <16 x i32> %81)
%83 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %82, i32 -1)
store volatile <16 x i32> %83, <16 x i32>* @Q6VecPredResult, align 64
%84 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%85 = bitcast <16 x i32> %84 to <512 x i1>
%85 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %84, i32 -1)
%86 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%87 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%88 = call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %85, <16 x i32> %86, <16 x i32> %87)
%89 = bitcast <512 x i1> %88 to <16 x i32>
%88 = call <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1> %85, <16 x i32> %86, <16 x i32> %87)
%89 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %88, i32 -1)
store volatile <16 x i32> %89, <16 x i32>* @Q6VecPredResult, align 64
%90 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%91 = bitcast <16 x i32> %90 to <512 x i1>
%91 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %90, i32 -1)
%92 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%93 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%94 = call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %91, <16 x i32> %92, <16 x i32> %93)
%95 = bitcast <512 x i1> %94 to <16 x i32>
%94 = call <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1> %91, <16 x i32> %92, <16 x i32> %93)
%95 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %94, i32 -1)
store volatile <16 x i32> %95, <16 x i32>* @Q6VecPredResult, align 64
%96 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%97 = bitcast <16 x i32> %96 to <512 x i1>
%97 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %96, i32 -1)
%98 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%99 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%100 = call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %97, <16 x i32> %98, <16 x i32> %99)
%101 = bitcast <512 x i1> %100 to <16 x i32>
%100 = call <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1> %97, <16 x i32> %98, <16 x i32> %99)
%101 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %100, i32 -1)
store volatile <16 x i32> %101, <16 x i32>* @Q6VecPredResult, align 64
%102 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%103 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%104 = call <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
%105 = bitcast <512 x i1> %104 to <16 x i32>
%104 = call <64 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
%105 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %104, i32 -1)
store volatile <16 x i32> %105, <16 x i32>* @Q6VecPredResult, align 64
%106 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%107 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%108 = call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
%109 = bitcast <512 x i1> %108 to <16 x i32>
%108 = call <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
%109 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %108, i32 -1)
store volatile <16 x i32> %109, <16 x i32>* @Q6VecPredResult, align 64
%110 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%111 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%112 = call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
%113 = bitcast <512 x i1> %112 to <16 x i32>
%112 = call <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
%113 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %112, i32 -1)
store volatile <16 x i32> %113, <16 x i32>* @Q6VecPredResult, align 64
%114 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%115 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%116 = call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
%117 = bitcast <512 x i1> %116 to <16 x i32>
%116 = call <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
%117 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %116, i32 -1)
store volatile <16 x i32> %117, <16 x i32>* @Q6VecPredResult, align 64
%118 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%119 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%120 = call <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
%121 = bitcast <512 x i1> %120 to <16 x i32>
%120 = call <64 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
%121 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %120, i32 -1)
store volatile <16 x i32> %121, <16 x i32>* @Q6VecPredResult, align 64
%122 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%123 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%124 = call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
%125 = bitcast <512 x i1> %124 to <16 x i32>
%124 = call <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
%125 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %124, i32 -1)
store volatile <16 x i32> %125, <16 x i32>* @Q6VecPredResult, align 64
%126 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%127 = bitcast <16 x i32> %126 to <512 x i1>
%127 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %126, i32 -1)
%128 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%129 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%130 = call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %127, <16 x i32> %128, <16 x i32> %129)
%131 = bitcast <512 x i1> %130 to <16 x i32>
%130 = call <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1> %127, <16 x i32> %128, <16 x i32> %129)
%131 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %130, i32 -1)
store volatile <16 x i32> %131, <16 x i32>* @Q6VecPredResult, align 64
%132 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%133 = bitcast <16 x i32> %132 to <512 x i1>
%133 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %132, i32 -1)
%134 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%135 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%136 = call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %133, <16 x i32> %134, <16 x i32> %135)
%137 = bitcast <512 x i1> %136 to <16 x i32>
%136 = call <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1> %133, <16 x i32> %134, <16 x i32> %135)
%137 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %136, i32 -1)
store volatile <16 x i32> %137, <16 x i32>* @Q6VecPredResult, align 64
%138 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%139 = bitcast <16 x i32> %138 to <512 x i1>
%139 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %138, i32 -1)
%140 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%141 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%142 = call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %139, <16 x i32> %140, <16 x i32> %141)
%143 = bitcast <512 x i1> %142 to <16 x i32>
%142 = call <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1> %139, <16 x i32> %140, <16 x i32> %141)
%143 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %142, i32 -1)
store volatile <16 x i32> %143, <16 x i32>* @Q6VecPredResult, align 64
%144 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%145 = bitcast <16 x i32> %144 to <512 x i1>
%145 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %144, i32 -1)
%146 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%147 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%148 = call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %145, <16 x i32> %146, <16 x i32> %147)
%149 = bitcast <512 x i1> %148 to <16 x i32>
%148 = call <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1> %145, <16 x i32> %146, <16 x i32> %147)
%149 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %148, i32 -1)
store volatile <16 x i32> %149, <16 x i32>* @Q6VecPredResult, align 64
%150 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%151 = bitcast <16 x i32> %150 to <512 x i1>
%151 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %150, i32 -1)
%152 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%153 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%154 = call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %151, <16 x i32> %152, <16 x i32> %153)
%155 = bitcast <512 x i1> %154 to <16 x i32>
%154 = call <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1> %151, <16 x i32> %152, <16 x i32> %153)
%155 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %154, i32 -1)
store volatile <16 x i32> %155, <16 x i32>* @Q6VecPredResult, align 64
%156 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%157 = bitcast <16 x i32> %156 to <512 x i1>
%157 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %156, i32 -1)
%158 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%159 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%160 = call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %157, <16 x i32> %158, <16 x i32> %159)
%161 = bitcast <512 x i1> %160 to <16 x i32>
%160 = call <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1> %157, <16 x i32> %158, <16 x i32> %159)
%161 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %160, i32 -1)
store volatile <16 x i32> %161, <16 x i32>* @Q6VecPredResult, align 64
%162 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%163 = bitcast <16 x i32> %162 to <512 x i1>
%163 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %162, i32 -1)
%164 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%165 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%166 = call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %163, <16 x i32> %164, <16 x i32> %165)
%167 = bitcast <512 x i1> %166 to <16 x i32>
%166 = call <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1> %163, <16 x i32> %164, <16 x i32> %165)
%167 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %166, i32 -1)
store volatile <16 x i32> %167, <16 x i32>* @Q6VecPredResult, align 64
%168 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%169 = bitcast <16 x i32> %168 to <512 x i1>
%169 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %168, i32 -1)
%170 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%171 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%172 = call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %169, <16 x i32> %170, <16 x i32> %171)
%173 = bitcast <512 x i1> %172 to <16 x i32>
%172 = call <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1> %169, <16 x i32> %170, <16 x i32> %171)
%173 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %172, i32 -1)
store volatile <16 x i32> %173, <16 x i32>* @Q6VecPredResult, align 64
%174 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%175 = bitcast <16 x i32> %174 to <512 x i1>
%175 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %174, i32 -1)
%176 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%177 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%178 = call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %175, <16 x i32> %176, <16 x i32> %177)
%179 = bitcast <512 x i1> %178 to <16 x i32>
%178 = call <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1> %175, <16 x i32> %176, <16 x i32> %177)
%179 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %178, i32 -1)
store volatile <16 x i32> %179, <16 x i32>* @Q6VecPredResult, align 64
%180 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%181 = bitcast <16 x i32> %180 to <512 x i1>
%181 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %180, i32 -1)
%182 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%183 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%184 = call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %181, <16 x i32> %182, <16 x i32> %183)
%185 = bitcast <512 x i1> %184 to <16 x i32>
%184 = call <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1> %181, <16 x i32> %182, <16 x i32> %183)
%185 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %184, i32 -1)
store volatile <16 x i32> %185, <16 x i32>* @Q6VecPredResult, align 64
%186 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%187 = bitcast <16 x i32> %186 to <512 x i1>
%187 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %186, i32 -1)
%188 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%189 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%190 = call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %187, <16 x i32> %188, <16 x i32> %189)
%191 = bitcast <512 x i1> %190 to <16 x i32>
%190 = call <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1> %187, <16 x i32> %188, <16 x i32> %189)
%191 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %190, i32 -1)
store volatile <16 x i32> %191, <16 x i32>* @Q6VecPredResult, align 64
%192 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%193 = bitcast <16 x i32> %192 to <512 x i1>
%193 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %192, i32 -1)
%194 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%195 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%196 = call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %193, <16 x i32> %194, <16 x i32> %195)
%197 = bitcast <512 x i1> %196 to <16 x i32>
%196 = call <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1> %193, <16 x i32> %194, <16 x i32> %195)
%197 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %196, i32 -1)
store volatile <16 x i32> %197, <16 x i32>* @Q6VecPredResult, align 64
%198 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%199 = bitcast <16 x i32> %198 to <512 x i1>
%199 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %198, i32 -1)
%200 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%201 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%202 = call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %199, <16 x i32> %200, <16 x i32> %201)
%203 = bitcast <512 x i1> %202 to <16 x i32>
%202 = call <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1> %199, <16 x i32> %200, <16 x i32> %201)
%203 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %202, i32 -1)
store volatile <16 x i32> %203, <16 x i32>* @Q6VecPredResult, align 64
%204 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%205 = bitcast <16 x i32> %204 to <512 x i1>
%205 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %204, i32 -1)
%206 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%207 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%208 = call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %205, <16 x i32> %206, <16 x i32> %207)
%209 = bitcast <512 x i1> %208 to <16 x i32>
%208 = call <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1> %205, <16 x i32> %206, <16 x i32> %207)
%209 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %208, i32 -1)
store volatile <16 x i32> %209, <16 x i32>* @Q6VecPredResult, align 64
%210 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%211 = bitcast <16 x i32> %210 to <512 x i1>
%211 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %210, i32 -1)
%212 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%213 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%214 = call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %211, <16 x i32> %212, <16 x i32> %213)
%215 = bitcast <512 x i1> %214 to <16 x i32>
%214 = call <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1> %211, <16 x i32> %212, <16 x i32> %213)
%215 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %214, i32 -1)
store volatile <16 x i32> %215, <16 x i32>* @Q6VecPredResult, align 64
%216 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%217 = bitcast <16 x i32> %216 to <512 x i1>
%217 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %216, i32 -1)
%218 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%219 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%220 = call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %217, <16 x i32> %218, <16 x i32> %219)
%221 = bitcast <512 x i1> %220 to <16 x i32>
%220 = call <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1> %217, <16 x i32> %218, <16 x i32> %219)
%221 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %220, i32 -1)
store volatile <16 x i32> %221, <16 x i32>* @Q6VecPredResult, align 64
%222 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%223 = bitcast <16 x i32> %222 to <512 x i1>
%223 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %222, i32 -1)
%224 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%225 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%226 = call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %223, <16 x i32> %224, <16 x i32> %225)
%227 = bitcast <512 x i1> %226 to <16 x i32>
%226 = call <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1> %223, <16 x i32> %224, <16 x i32> %225)
%227 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %226, i32 -1)
store volatile <16 x i32> %227, <16 x i32>* @Q6VecPredResult, align 64
%228 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%229 = bitcast <16 x i32> %228 to <512 x i1>
%229 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %228, i32 -1)
%230 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%231 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%232 = call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %229, <16 x i32> %230, <16 x i32> %231)
%233 = bitcast <512 x i1> %232 to <16 x i32>
%232 = call <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1> %229, <16 x i32> %230, <16 x i32> %231)
%233 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %232, i32 -1)
store volatile <16 x i32> %233, <16 x i32>* @Q6VecPredResult, align 64
%234 = call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
%235 = bitcast <512 x i1> %234 to <16 x i32>
%234 = call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
%235 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %234, i32 -1)
store volatile <16 x i32> %235, <16 x i32>* @Q6VecPredResult, align 64
%236 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%237 = bitcast <16 x i32> %236 to <512 x i1>
%237 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %236, i32 -1)
%238 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
%239 = bitcast <16 x i32> %238 to <512 x i1>
%240 = call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %237, <512 x i1> %239)
%241 = bitcast <512 x i1> %240 to <16 x i32>
%239 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %238, i32 -1)
%240 = call <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1> %237, <64 x i1> %239)
%241 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %240, i32 -1)
store volatile <16 x i32> %241, <16 x i32>* @Q6VecPredResult, align 64
%242 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%243 = call <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32> %242)
@ -676,8 +676,8 @@ entry:
%253 = call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %251, <16 x i32> %252, i32 -1)
store volatile <16 x i32> %253, <16 x i32>* @VectorResult, align 64
%254 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%255 = bitcast <16 x i32> %254 to <512 x i1>
%256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %255, i32 -1)
%255 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %254, i32 -1)
%256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %255, i32 -1)
store volatile <16 x i32> %256, <16 x i32>* @VectorResult, align 64
%257 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%258 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@ -685,8 +685,8 @@ entry:
store volatile <16 x i32> %259, <16 x i32>* @VectorResult, align 64
%260 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%261 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%262 = bitcast <16 x i32> %261 to <512 x i1>
%263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <512 x i1> %262, i32 -1)
%262 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %261, i32 -1)
%263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <64 x i1> %262, i32 -1)
store volatile <16 x i32> %263, <16 x i32>* @VectorResult, align 64
%264 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%265 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@ -701,10 +701,10 @@ entry:
%272 = call <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32> %270, <16 x i32> %271, i32 -1)
store volatile <16 x i32> %272, <16 x i32>* @VectorResult, align 64
%273 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%274 = bitcast <16 x i32> %273 to <512 x i1>
%274 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %273, i32 -1)
%275 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%276 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%277 = call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %274, <16 x i32> %275, <16 x i32> %276)
%277 = call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %274, <16 x i32> %275, <16 x i32> %276)
store volatile <16 x i32> %277, <16 x i32>* @VectorResult, align 64
%278 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%279 = call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %278)
@ -729,28 +729,28 @@ entry:
%292 = call <16 x i32> @llvm.hexagon.V6.vd0()
store volatile <16 x i32> %292, <16 x i32>* @VectorResult, align 64
%293 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%294 = bitcast <16 x i32> %293 to <512 x i1>
%294 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %293, i32 -1)
%295 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%296 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %294, <16 x i32> %295, <16 x i32> %296)
%297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1> %294, <16 x i32> %295, <16 x i32> %296)
store volatile <16 x i32> %297, <16 x i32>* @VectorResult, align 64
%298 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%299 = bitcast <16 x i32> %298 to <512 x i1>
%299 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %298, i32 -1)
%300 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%301 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %299, <16 x i32> %300, <16 x i32> %301)
%302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1> %299, <16 x i32> %300, <16 x i32> %301)
store volatile <16 x i32> %302, <16 x i32>* @VectorResult, align 64
%303 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%304 = bitcast <16 x i32> %303 to <512 x i1>
%304 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %303, i32 -1)
%305 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%306 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %304, <16 x i32> %305, <16 x i32> %306)
%307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1> %304, <16 x i32> %305, <16 x i32> %306)
store volatile <16 x i32> %307, <16 x i32>* @VectorResult, align 64
%308 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%309 = bitcast <16 x i32> %308 to <512 x i1>
%309 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %308, i32 -1)
%310 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%311 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %309, <16 x i32> %310, <16 x i32> %311)
%312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1> %309, <16 x i32> %310, <16 x i32> %311)
store volatile <16 x i32> %312, <16 x i32>* @VectorResult, align 64
%313 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%314 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
@ -812,28 +812,28 @@ entry:
%356 = call <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32> %354, <16 x i32> %355)
store volatile <16 x i32> %356, <16 x i32>* @VectorResult, align 64
%357 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%358 = bitcast <16 x i32> %357 to <512 x i1>
%358 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %357, i32 -1)
%359 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%360 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %358, <16 x i32> %359, <16 x i32> %360)
%361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1> %358, <16 x i32> %359, <16 x i32> %360)
store volatile <16 x i32> %361, <16 x i32>* @VectorResult, align 64
%362 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%363 = bitcast <16 x i32> %362 to <512 x i1>
%363 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %362, i32 -1)
%364 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%365 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %363, <16 x i32> %364, <16 x i32> %365)
%366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1> %363, <16 x i32> %364, <16 x i32> %365)
store volatile <16 x i32> %366, <16 x i32>* @VectorResult, align 64
%367 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%368 = bitcast <16 x i32> %367 to <512 x i1>
%368 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %367, i32 -1)
%369 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%370 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %368, <16 x i32> %369, <16 x i32> %370)
%371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1> %368, <16 x i32> %369, <16 x i32> %370)
store volatile <16 x i32> %371, <16 x i32>* @VectorResult, align 64
%372 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%373 = bitcast <16 x i32> %372 to <512 x i1>
%373 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %372, i32 -1)
%374 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%375 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %373, <16 x i32> %374, <16 x i32> %375)
%376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1> %373, <16 x i32> %374, <16 x i32> %375)
store volatile <16 x i32> %376, <16 x i32>* @VectorResult, align 64
%377 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%378 = call <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32> %377)
@ -1105,28 +1105,28 @@ entry:
%574 = call <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32> %571, <16 x i32> %572, <16 x i32> %573)
store volatile <16 x i32> %574, <16 x i32>* @VectorResult, align 64
%575 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%576 = bitcast <16 x i32> %575 to <512 x i1>
%576 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %575, i32 -1)
%577 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%578 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %576, <16 x i32> %577, <16 x i32> %578)
%579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1> %576, <16 x i32> %577, <16 x i32> %578)
store volatile <16 x i32> %579, <16 x i32>* @VectorResult, align 64
%580 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%581 = bitcast <16 x i32> %580 to <512 x i1>
%581 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %580, i32 -1)
%582 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%583 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %581, <16 x i32> %582, <16 x i32> %583)
%584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1> %581, <16 x i32> %582, <16 x i32> %583)
store volatile <16 x i32> %584, <16 x i32>* @VectorResult, align 64
%585 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%586 = bitcast <16 x i32> %585 to <512 x i1>
%586 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %585, i32 -1)
%587 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%588 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %586, <16 x i32> %587, <16 x i32> %588)
%589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1> %586, <16 x i32> %587, <16 x i32> %588)
store volatile <16 x i32> %589, <16 x i32>* @VectorResult, align 64
%590 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%591 = bitcast <16 x i32> %590 to <512 x i1>
%591 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %590, i32 -1)
%592 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%593 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %591, <16 x i32> %592, <16 x i32> %593)
%594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1> %591, <16 x i32> %592, <16 x i32> %593)
store volatile <16 x i32> %594, <16 x i32>* @VectorResult, align 64
%595 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%596 = call <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32> %595)
@ -1359,10 +1359,10 @@ entry:
%764 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %762, <16 x i32> %763, i32 1)
store volatile <32 x i32> %764, <32 x i32>* @VectorPairResult, align 128
%765 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%766 = bitcast <16 x i32> %765 to <512 x i1>
%766 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %765, i32 -1)
%767 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
%768 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
%769 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %766, <16 x i32> %767, <16 x i32> %768)
%769 = call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> %766, <16 x i32> %767, <16 x i32> %768)
store volatile <32 x i32> %769, <32 x i32>* @VectorPairResult, align 128
%770 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
%771 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
@ -1664,139 +1664,139 @@ entry:
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.not(<64 x i1>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.or(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.or.n(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #1
declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqb.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqh.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqb.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqh.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.veqw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtb.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgth.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuh.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtw.and(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtb.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgth.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuh.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtw.or(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtb.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgth.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtub.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuh.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtuw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <64 x i1> @llvm.hexagon.V6.vgtw.xor(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.xor(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #1
@ -1814,13 +1814,13 @@ declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vdelta(<16 x i32>, <16 x i32>) #1
@ -1832,7 +1832,7 @@ declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
@ -1856,16 +1856,16 @@ declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vd0() #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubbq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddb(<16 x i32>, <16 x i32>) #1
@ -1913,16 +1913,16 @@ declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1
@ -2138,16 +2138,16 @@ declare <16 x i32> @llvm.hexagon.V6.vrmpyub.acc(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #1
@ -2318,7 +2318,7 @@ declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vaddb.dv(<32 x i32>, <32 x i32>) #1

View File

@ -60,10 +60,10 @@ b1: ; preds = %b3, %b0
b2: ; preds = %b1
%v34 = load <16 x i32>, <16 x i32>* %v11, align 64
%v35 = bitcast <16 x i32> %v34 to <512 x i1>
%v35 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v34, i32 -1)
%v36 = load <16 x i32>, <16 x i32>* %v14, align 64
%v37 = load <16 x i32>, <16 x i32>* %v15, align 64
%v38 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %v35, <16 x i32> %v36, <16 x i32> %v37)
%v38 = call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> %v35, <16 x i32> %v36, <16 x i32> %v37)
store <32 x i32> %v38, <32 x i32>* %v13, align 128
%v39 = load <32 x i32>, <32 x i32>* %v13, align 128
%v40 = call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v39)
@ -89,7 +89,7 @@ b4: ; preds = %b1
}
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
@ -100,5 +100,8 @@ declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }

View File

@ -24,28 +24,34 @@ entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%1 = bitcast <16 x i32> %0 to <512 x i1>
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -1)
%2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
%3 = bitcast <16 x i32> %2 to <512 x i1>
%4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
%5 = bitcast <512 x i1> %4 to <16 x i32>
%3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %2, i32 -1)
%4 = call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %1, <64 x i1> %3)
%5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %4, i32 -1)
store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
%6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
%7 = bitcast <16 x i32> %6 to <512 x i1>
%7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %6, i32 -1)
%8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
%9 = bitcast <16 x i32> %8 to <512 x i1>
%10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
%11 = bitcast <512 x i1> %10 to <16 x i32>
%9 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %8, i32 -1)
%10 = call <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1> %7, <64 x i1> %9)
%11 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %10, i32 -1)
store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
ret i32 0
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
declare <64 x i1> @llvm.hexagon.V6.pred.and.n(<64 x i1>, <64 x i1>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
; Function Attrs: nounwind readnone
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

View File

@ -12,8 +12,9 @@ b0:
store i32 %a0, i32* %v0, align 4
store <16 x i32> %a1, <16 x i32>* %v1, align 64
%v3 = load i32, i32* %v0, align 4
%v4 = load <16 x i32>, <16 x i32>* %v2, align 64
call void asm sideeffect " $1 = vsetq2($0);\0A", "r,q"(i32 %v3, <16 x i32> %v4) #1
%v4 = tail call <64 x i1> asm sideeffect " $0 = vsetq2($1);\0A", "=q,r"(i32 %v3) #1
%v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
store <16 x i32> %v5, <16 x i32>* %v2, align 64
ret void
}
@ -23,5 +24,7 @@ b0:
ret i32 0
}
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" }
attributes #1 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -17,66 +17,66 @@ b1: ; preds = %b1, %b0
%v5 = phi i32 [ %v77, %b1 ], [ 0, %b0 ]
%v6 = phi <32 x i32>* [ undef, %b1 ], [ undef, %b0 ]
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> undef, <32 x i32> undef)
%v8 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v7, <32 x i32> zeroinitializer)
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v8, <32 x i32> undef, <32 x i32> %v0)
%v8 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v7, <32 x i32> zeroinitializer)
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v8, <32 x i32> undef, <32 x i32> %v0)
%v10 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 3)
%v11 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
%v12 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v10, <32 x i32> undef)
%v13 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
%v14 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> zeroinitializer)
%v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v13, <32 x i32> %v9, <32 x i32> %v0)
%v16 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v14, <32 x i32> %v15, <32 x i32> %v0)
%v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v16, <32 x i32> %v0)
%v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v17, <32 x i32> %v0)
%v19 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v19, <32 x i32> %v18, <32 x i32> %v0)
%v21 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> undef)
%v23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v21, <32 x i32> undef, <32 x i32> undef)
%v13 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
%v14 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> zeroinitializer)
%v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v13, <32 x i32> %v9, <32 x i32> %v0)
%v16 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v14, <32 x i32> %v15, <32 x i32> %v0)
%v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v16, <32 x i32> %v0)
%v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v17, <32 x i32> %v0)
%v19 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v19, <32 x i32> %v18, <32 x i32> %v0)
%v21 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> undef)
%v23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v21, <32 x i32> undef, <32 x i32> undef)
%v24 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v23, <32 x i32> %v22)
%v25 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> zeroinitializer, <64 x i32> %v24, i32 16843009)
%v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v20, <32 x i32> %v0)
%v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v26, <32 x i32> %v0)
%v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v27, <32 x i32> %v0)
%v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v21, <32 x i32> %v28, <32 x i32> %v0)
%v30 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> undef, <32 x i32> undef, <32 x i32> zeroinitializer)
%v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v20, <32 x i32> %v0)
%v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v26, <32 x i32> %v0)
%v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v27, <32 x i32> %v0)
%v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v21, <32 x i32> %v28, <32 x i32> %v0)
%v30 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> undef, <32 x i32> undef, <32 x i32> zeroinitializer)
%v32 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v31, <32 x i32> undef)
%v33 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v25, <64 x i32> %v32, i32 16843009)
%v34 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v33, <64 x i32> undef, i32 16843009)
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v29, <32 x i32> %v0)
%v36 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v35, <32 x i32> %v0)
%v37 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v36, <32 x i32> %v0)
%v38 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v30, <32 x i32> %v37, <32 x i32> %v0)
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v29, <32 x i32> %v0)
%v36 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v35, <32 x i32> %v0)
%v37 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v36, <32 x i32> %v0)
%v38 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v30, <32 x i32> %v37, <32 x i32> %v0)
%v39 = load <32 x i32>, <32 x i32>* null, align 128, !tbaa !0
%v40 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v40, <32 x i32> undef, <32 x i32> %v39)
%v40 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v40, <32 x i32> undef, <32 x i32> %v39)
%v42 = tail call <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32> %v34, <32 x i32> %v41, i32 16843009)
%v43 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v40, <32 x i32> %v38, <32 x i32> %v0)
%v43 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v40, <32 x i32> %v38, <32 x i32> %v0)
%v44 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> %v39, <32 x i32> undef, i32 1)
%v45 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> %v39, i32 1)
%v46 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> %v39, i32 2)
%v47 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v44, <32 x i32> undef)
%v48 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v45, <32 x i32> undef)
%v49 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v46, <32 x i32> undef)
%v50 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v47, <32 x i32> zeroinitializer)
%v51 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v48, <32 x i32> zeroinitializer)
%v52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v49, <32 x i32> zeroinitializer)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %v52, <32 x i32> undef, <32 x i32> %v46)
%v50 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v47, <32 x i32> zeroinitializer)
%v51 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v48, <32 x i32> zeroinitializer)
%v52 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v49, <32 x i32> zeroinitializer)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1> %v52, <32 x i32> undef, <32 x i32> %v46)
%v54 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v42, <64 x i32> undef, i32 16843009)
%v55 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v53, <32 x i32> undef)
%v56 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v54, <64 x i32> %v55, i32 16843009)
%v57 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v50, <32 x i32> %v43, <32 x i32> %v0)
%v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v51, <32 x i32> %v57, <32 x i32> %v0)
%v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v58, <32 x i32> %v0)
%v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v52, <32 x i32> %v59, <32 x i32> %v0)
%v61 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v57 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v50, <32 x i32> %v43, <32 x i32> %v0)
%v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v51, <32 x i32> %v57, <32 x i32> %v0)
%v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v58, <32 x i32> %v0)
%v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v52, <32 x i32> %v59, <32 x i32> %v0)
%v61 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v62 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v56, <64 x i32> undef, i32 16843009)
%v63 = tail call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %v62, <64 x i32> zeroinitializer, i32 16843009)
%v64 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v60, <32 x i32> %v0)
%v65 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v61, <32 x i32> %v64, <32 x i32> %v0)
%v66 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v65, <32 x i32> %v0)
%v67 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v66, <32 x i32> %v0)
%v64 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v60, <32 x i32> %v0)
%v65 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v61, <32 x i32> %v64, <32 x i32> %v0)
%v66 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v65, <32 x i32> %v0)
%v67 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v66, <32 x i32> %v0)
%v68 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> undef, <32 x i32> %v67, <32 x i32> %v1, i32 3)
%v69 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> %v68, <32 x i32> %v67, <32 x i32> %v2, i32 4)
%v70 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> %v69, <32 x i32> %v67, <32 x i32> %v2, i32 5)
@ -108,16 +108,16 @@ declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32>, <32 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32>, <32 x i32>, i32) #1

View File

@ -19,8 +19,8 @@ b0:
%v1 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 12)
store <16 x i32> %v1, <16 x i32>* @g2, align 64, !tbaa !0
%v2 = load <16 x i32>, <16 x i32>* @g0, align 64, !tbaa !0
%v3 = bitcast <16 x i32> %v2 to <512 x i1>
%v4 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v3, <16 x i32> %v0, <16 x i32> %v1)
%v3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v2, i32 -1)
%v4 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v3, <16 x i32> %v0, <16 x i32> %v1)
store <16 x i32> %v4, <16 x i32>* @g3, align 64, !tbaa !0
ret i32 0
}
@ -29,7 +29,10 @@ b0:
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }

View File

@ -20,61 +20,61 @@ b2: ; preds = %b2, %b1
%v4 = phi <32 x i32>* [ %v3, %b1 ], [ undef, %b2 ]
%v5 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> zeroinitializer, i32 2)
%v6 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v5, <32 x i32> zeroinitializer)
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
%v8 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v7, <32 x i32> zeroinitializer)
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> zeroinitializer, <32 x i32> %v8, <32 x i32> zeroinitializer)
%v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v9, <32 x i32> zeroinitializer)
%v7 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
%v8 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v7, <32 x i32> zeroinitializer)
%v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> zeroinitializer, <32 x i32> %v8, <32 x i32> zeroinitializer)
%v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v9, <32 x i32> zeroinitializer)
%v11 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> zeroinitializer, i32 4)
%v12 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v11, <32 x i32> zeroinitializer)
%v13 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> zeroinitializer, <32 x i32> zeroinitializer)
%v14 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
%v15 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> undef)
%v16 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v13, <32 x i32> undef)
%v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v10, <32 x i32> zeroinitializer)
%v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v14, <32 x i32> %v17, <32 x i32> zeroinitializer)
%v19 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v15, <32 x i32> %v18, <32 x i32> zeroinitializer)
%v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v16, <32 x i32> %v19, <32 x i32> zeroinitializer)
%v14 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> zeroinitializer, <32 x i32> undef)
%v15 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v12, <32 x i32> undef)
%v16 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v13, <32 x i32> undef)
%v17 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v10, <32 x i32> zeroinitializer)
%v18 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v14, <32 x i32> %v17, <32 x i32> zeroinitializer)
%v19 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v15, <32 x i32> %v18, <32 x i32> zeroinitializer)
%v20 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v16, <32 x i32> %v19, <32 x i32> zeroinitializer)
%v21 = getelementptr inbounds i8, i8* null, i32 undef
%v22 = bitcast i8* %v21 to <32 x i32>*
%v23 = load <32 x i32>, <32 x i32>* %v22, align 128, !tbaa !0
%v24 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v23, <32 x i32> zeroinitializer)
%v25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v24, <32 x i32> undef)
%v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v25, <32 x i32> %v20, <32 x i32> zeroinitializer)
%v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v26, <32 x i32> zeroinitializer)
%v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v27, <32 x i32> zeroinitializer)
%v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v28, <32 x i32> zeroinitializer)
%v30 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v29, <32 x i32> zeroinitializer)
%v31 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v30, <32 x i32> zeroinitializer)
%v32 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v31, <32 x i32> zeroinitializer)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v32, <32 x i32> zeroinitializer)
%v34 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v33, <32 x i32> zeroinitializer)
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v34, <32 x i32> zeroinitializer)
%v25 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v24, <32 x i32> undef)
%v26 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v25, <32 x i32> %v20, <32 x i32> zeroinitializer)
%v27 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v26, <32 x i32> zeroinitializer)
%v28 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v27, <32 x i32> zeroinitializer)
%v29 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v28, <32 x i32> zeroinitializer)
%v30 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v29, <32 x i32> zeroinitializer)
%v31 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v30, <32 x i32> zeroinitializer)
%v32 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v31, <32 x i32> zeroinitializer)
%v33 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v32, <32 x i32> zeroinitializer)
%v34 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v33, <32 x i32> zeroinitializer)
%v35 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v34, <32 x i32> zeroinitializer)
%v36 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 1)
%v37 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 1)
%v38 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 2)
%v39 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v36, <32 x i32> zeroinitializer)
%v40 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v37, <32 x i32> zeroinitializer)
%v41 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v38, <32 x i32> zeroinitializer)
%v42 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v39, <32 x i32> undef)
%v43 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v40, <32 x i32> undef)
%v44 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v41, <32 x i32> undef)
%v45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> undef)
%v46 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v42, <32 x i32> %v35, <32 x i32> zeroinitializer)
%v47 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v43, <32 x i32> %v46, <32 x i32> zeroinitializer)
%v48 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v44, <32 x i32> %v47, <32 x i32> zeroinitializer)
%v49 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v45, <32 x i32> %v48, <32 x i32> zeroinitializer)
%v42 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v39, <32 x i32> undef)
%v43 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v40, <32 x i32> undef)
%v44 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v41, <32 x i32> undef)
%v45 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> undef, <32 x i32> undef)
%v46 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v42, <32 x i32> %v35, <32 x i32> zeroinitializer)
%v47 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v43, <32 x i32> %v46, <32 x i32> zeroinitializer)
%v48 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v44, <32 x i32> %v47, <32 x i32> zeroinitializer)
%v49 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v45, <32 x i32> %v48, <32 x i32> zeroinitializer)
%v50 = tail call <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 4)
%v51 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> undef, <32 x i32> undef, i32 4)
%v52 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> undef, <32 x i32> zeroinitializer)
%v53 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v50, <32 x i32> zeroinitializer)
%v54 = tail call <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32> %v51, <32 x i32> zeroinitializer)
%v55 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v52, <32 x i32> undef)
%v56 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v53, <32 x i32> undef)
%v57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v54, <32 x i32> undef)
%v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> undef, <32 x i32> %v49, <32 x i32> zeroinitializer)
%v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v55, <32 x i32> %v58, <32 x i32> zeroinitializer)
%v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v56, <32 x i32> %v59, <32 x i32> zeroinitializer)
%v61 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1> %v57, <32 x i32> %v60, <32 x i32> zeroinitializer)
%v55 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v52, <32 x i32> undef)
%v56 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v53, <32 x i32> undef)
%v57 = tail call <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32> %v54, <32 x i32> undef)
%v58 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> undef, <32 x i32> %v49, <32 x i32> zeroinitializer)
%v59 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v55, <32 x i32> %v58, <32 x i32> zeroinitializer)
%v60 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v56, <32 x i32> %v59, <32 x i32> zeroinitializer)
%v61 = tail call <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1> %v57, <32 x i32> %v60, <32 x i32> zeroinitializer)
%v62 = tail call <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32> zeroinitializer, <32 x i32> %v61, <32 x i32> undef, i32 5)
%v63 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuhv.128B(<32 x i32> undef, <32 x i32> undef)
%v64 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v62)
@ -100,10 +100,10 @@ declare <32 x i32> @llvm.hexagon.V6.vshuffh.128B(<32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vabsdiffub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <1024 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
declare <128 x i1> @llvm.hexagon.V6.vgtub.128B(<32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vaddbnq.128B(<128 x i1>, <32 x i32>, <32 x i32>) #1
; Function Attrs: nounwind readnone
declare <32 x i32> @llvm.hexagon.V6.vlalignbi.128B(<32 x i32>, <32 x i32>, i32) #1

View File

@ -30,20 +30,20 @@ entry:
%call1 = tail call i32 @acquire_vector_unit(i8 zeroext 0) #3
tail call void @init_vectors() #3
%0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
%1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
%1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 16843009)
%2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%3 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -2147483648)
%4 = bitcast <512 x i1> %3 to <16 x i32>
%3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 -2147483648)
%4 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %3, i32 -1)
store <16 x i32> %4, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts = tail call i32 @puts(i8* getelementptr inbounds ([106 x i8], [106 x i8]* @str, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
%5 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 -1)
%6 = bitcast <512 x i1> %5 to <16 x i32>
%5 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 -1)
%6 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %5, i32 -1)
store <16 x i32> %6, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts5 = tail call i32 @puts(i8* getelementptr inbounds ([99 x i8], [99 x i8]* @str3, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
%7 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %1, <16 x i32> %2, i32 0)
%8 = bitcast <512 x i1> %7 to <16 x i32>
%7 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1> %1, <16 x i32> %2, i32 0)
%8 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %7, i32 -1)
store <16 x i32> %8, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
%puts6 = tail call i32 @puts(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @str4, i32 0, i32 0))
tail call void @print_vecpred(i32 512, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
@ -57,10 +57,13 @@ declare i32 @acquire_vector_unit(i8 zeroext) #1
declare void @init_vectors() #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #2
declare <64 x i1> @llvm.hexagon.V6.vandvrt.acc(<64 x i1>, <16 x i32>, i32) #2
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #2
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #2
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #2

View File

@ -11,19 +11,18 @@ target triple = "hexagon"
define i32 @f0() #0 {
b0:
%v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%v1 = bitcast <16 x i32> %v0 to <512 x i1>
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -1)
%v2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 2)
%v3 = bitcast <16 x i32> %v2 to <512 x i1>
%v4 = tail call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %v1, <512 x i1> %v3)
%v5 = bitcast <512 x i1> %v4 to <16 x i32>
%v3 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v2, i32 -1)
%v4 = tail call <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1> %v1, <64 x i1> %v3)
%v5 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v4, i32 -1)
store <16 x i32> %v5, <16 x i32>* @g0, align 64, !tbaa !0
ret i32 0
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32)
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32)
declare <64 x i1> @llvm.hexagon.V6.pred.and(<64 x i1>, <64 x i1>) #1
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }

View File

@ -34,7 +34,7 @@ b0:
%v11 = mul i32 %v10, %v9
%v12 = sub i32 %a1, %v11
%v13 = lshr i32 %v12, 1
%v14 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v13)
%v14 = tail call <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v13)
%v15 = icmp eq i32 %a2, 0
br i1 %v15, label %b11, label %b1
@ -132,7 +132,7 @@ b9: ; preds = %b8, %b7
%v80 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32> %v78, <16 x i32> %v76, i32 1077952576)
%v81 = tail call <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32> %v80, <16 x i32> %v79)
%v82 = load <16 x i32>, <16 x i32>* %v68, align 64, !tbaa !2
%v83 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v14, <16 x i32> %v81, <16 x i32> %v82)
%v83 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1> %v14, <16 x i32> %v81, <16 x i32> %v82)
store <16 x i32> %v83, <16 x i32>* %v68, align 64, !tbaa !2
br label %b10
@ -157,7 +157,7 @@ declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare i32 @llvm.hexagon.S2.ct0(i32) #1
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
declare <64 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
@ -166,7 +166,7 @@ declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32>, <16 x i32>) #1
; Function Attrs: nounwind readnone
declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.vmux(<64 x i1>, <16 x i32>, <16 x i32>) #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
attributes #1 = { nounwind readnone }

View File

@ -1,5 +1,4 @@
; RUN: llc -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b < %s \
; RUN: | FileCheck %s
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that the store to Q6VecPredResult does not get expanded into multiple
; stores. There should be no memd's. This relies on the alignment specified
@ -11,25 +10,23 @@
@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
; Function Attrs: nounwind
define i32 @foo() #0 {
entry:
%0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%1 = tail call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %0, i32 -2147483648)
store <512 x i1> %1, <512 x i1>* bitcast (<16 x i32>* @Q6VecPredResult to <512 x i1>*), align 64, !tbaa !1
%v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 1)
%v1 = tail call <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %v0, i32 -2147483648)
%v2 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1> %v1, i32 -1)
store <16 x i32> %v2, <16 x i32>* @Q6VecPredResult, align 64, !tbaa !1
tail call void @print_vecpred(i32 64, i8* bitcast (<16 x i32>* @Q6VecPredResult to i8*)) #3
ret i32 0
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
; Function Attrs: nounwind readnone
declare <64 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.vandqrt(<64 x i1>, i32) #1
declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
declare void @print_vecpred(i32, i8*) #2
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" }
attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length64b" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }

View File

@ -12,7 +12,7 @@ for.body9.us:
%cmp10.us = icmp eq i32 0, undef
%.h63h32.2.us = select i1 %cmp10.us, <16 x i32> zeroinitializer, <16 x i32> undef
%0 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %.h63h32.2.us, <16 x i32> undef, i32 2)
%1 = tail call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> undef, <16 x i32> undef, <16 x i32> %0)
%1 = tail call <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1> undef, <16 x i32> undef, <16 x i32> %0)
%2 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %1)
%3 = tail call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> undef, <16 x i32> %2, i32 62)
%4 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %3)
@ -24,7 +24,7 @@ for.body43.us.preheader: ; preds = %for.body9.us
}
declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vswap(<64 x i1>, <16 x i32>, <16 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1