diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 02d67084f7a5..4b24634297cb 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -141,13 +141,13 @@ class Inst <string n, string p, string t, Op o> { string Name = n; string Prototype = p; string Types = t; + string ArchGuard = ""; + Op Operand = o; bit isShift = 0; bit isScalarShift = 0; bit isScalarNarrowShift = 0; bit isVCVT_N = 0; - bit isA64 = 0; - bit isCrypto = 0; // For immediate checks: the immediate will be assumed to specify the lane of // a Q register. Only used for intrinsics which end up calling polymorphic // builtins. @@ -546,90 +546,66 @@ def VFMA : SInst<"vfma", "dddd", "fQf">; //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics -let isA64 = 1 in { +let ArchGuard = "defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Load/Store -// With additional QUl, Ql, d, Qd, Pl, QPl type. -def LD1 : WInst<"vld1", "dc", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def LD2 : WInst<"vld2", "2c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def LD3 : WInst<"vld3", "3c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def LD4 : WInst<"vld4", "4c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST1 : WInst<"vst1", "vpd", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST2 : WInst<"vst2", "vp2", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST3 : WInst<"vst3", "vp3", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST4 : WInst<"vst4", "vp4", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; +def LD1 : WInst<"vld1", "dc", "dQdPlQPl">; +def LD2 : WInst<"vld2", "2c", "QUlQldQdPlQPl">; +def LD3 : WInst<"vld3", "3c", "QUlQldQdPlQPl">; +def LD4 : WInst<"vld4", "4c", "QUlQldQdPlQPl">; +def ST1 : WInst<"vst1", "vpd", "dQdPlQPl">; +def ST2 : WInst<"vst2", "vp2", "QUlQldQdPlQPl">; +def ST3 : WInst<"vst3", "vp3", "QUlQldQdPlQPl">; +def ST4 : WInst<"vst4", "vp4", "QUlQldQdPlQPl">; def LD1_X2 : WInst<"vld1_x2", "2c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def LD3_x3 : WInst<"vld1_x3", "3c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def LD4_x4 : WInst<"vld1_x4", "4c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def ST1_X2 : WInst<"vst1_x2", "vp2", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def ST1_X3 : WInst<"vst1_x3", "vp3", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def ST1_X4 : WInst<"vst1_x4", "vp4", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; -// With additional QUl, Ql, d, Qd, Pl, QPl type. -def LD1_LANE : WInst<"vld1_lane", "dcdi", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def LD2_LANE : WInst<"vld2_lane", "2c2i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def LD3_LANE : WInst<"vld3_lane", "3c3i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def LD4_LANE : WInst<"vld4_lane", "4c4i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST1_LANE : WInst<"vst1_lane", "vpdi", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST2_LANE : WInst<"vst2_lane", "vp2i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST3_LANE : WInst<"vst3_lane", "vp3i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST4_LANE : WInst<"vst4_lane", "vp4i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; +def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">; +def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD3_LANE : WInst<"vld3_lane", "3c3i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD4_LANE : WInst<"vld4_lane", "4c4i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST1_LANE : WInst<"vst1_lane", "vpdi", "dQdPlQPl">; +def ST2_LANE : WInst<"vst2_lane", "vp2i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST3_LANE : WInst<"vst3_lane", "vp3i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST4_LANE : WInst<"vst4_lane", "vp4i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD1_DUP : WInst<"vld1_dup", "dc", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; +def LD1_DUP : WInst<"vld1_dup", "dc", "dQdPlQPl">; def LD2_DUP : WInst<"vld2_dup", "2c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">; def LD3_DUP : WInst<"vld3_dup", "3c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">; def LD4_DUP : WInst<"vld4_dup", "4c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">; def VLDRQ : WInst<"vldrq", "sc", "Pk">; def VSTRQ : WInst<"vstrq", "vps", "Pk">; //////////////////////////////////////////////////////////////////////////////// // Addition -// With additional d, Qd type. -def ADD : IOpInst<"vadd", "ddd", "csilfdUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", - OP_ADD>; +def ADD : IOpInst<"vadd", "ddd", "dQd", OP_ADD>; //////////////////////////////////////////////////////////////////////////////// // Subtraction -// With additional Qd type. -def SUB : IOpInst<"vsub", "ddd", "csildfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", - OP_SUB>; +def SUB : IOpInst<"vsub", "ddd", "dQd", OP_SUB>; //////////////////////////////////////////////////////////////////////////////// // Multiplication -// With additional Qd type. -def MUL : IOpInst<"vmul", "ddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>; -def MLA : IOpInst<"vmla", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>; -def MLS : IOpInst<"vmls", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>; +def MUL : IOpInst<"vmul", "ddd", "dQd", OP_MUL>; +def MLA : IOpInst<"vmla", "dddd", "dQd", OP_MLA>; +def MLS : IOpInst<"vmls", "dddd", "dQd", OP_MLS>; //////////////////////////////////////////////////////////////////////////////// // Multiplication Extended @@ -641,8 +617,7 @@ def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>; //////////////////////////////////////////////////////////////////////////////// // Vector fused multiply-add operations -// With additional d, Qd type. -def FMLA : SInst<"vfma", "dddd", "fdQfQd">; +def FMLA : SInst<"vfma", "dddd", "dQd">; def FMLS : SInst<"vfms", "dddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// @@ -653,22 +628,18 @@ def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>; //////////////////////////////////////////////////////////////////////////////// // Logical operations -// With additional Qd, Ql, QPl type. -def BSL : SInst<"vbsl", "dudd", - "csilUcUsUiUlfdPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQdPlQPl">; +def BSL : SInst<"vbsl", "dudd", "dPlQdQPl">; //////////////////////////////////////////////////////////////////////////////// // Absolute Difference -// With additional Qd type. -def ABD : SInst<"vabd", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; +def ABD : SInst<"vabd", "ddd", "dQd">; //////////////////////////////////////////////////////////////////////////////// // saturating absolute/negate -// With additional Qd/Ql type. -def ABS : SInst<"vabs", "dd", "csilfdQcQsQiQfQlQd">; -def QABS : SInst<"vqabs", "dd", "csilQcQsQiQl">; -def NEG : SOpInst<"vneg", "dd", "csilfdQcQsQiQfQdQl", OP_NEG>; -def QNEG : SInst<"vqneg", "dd", "csilQcQsQiQl">; +def ABS : SInst<"vabs", "dd", "dQdlQl">; +def QABS : SInst<"vqabs", "dd", "lQl">; +def NEG : SOpInst<"vneg", "dd", "dlQdQl", OP_NEG>; +def QNEG : SInst<"vqneg", "dd", "lQl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Accumulated of Unsigned Value @@ -680,9 +651,8 @@ def USQADD : SInst<"vsqadd", "ddd", "UcUsUiUlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // Reciprocal/Sqrt -// With additional d, Qd type. -def FRECPS : IInst<"vrecps", "ddd", "fdQfQd">; -def FRSQRTS : IInst<"vrsqrts", "ddd", "fdQfQd">; +def FRECPS : IInst<"vrecps", "ddd", "dQd">; +def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">; //////////////////////////////////////////////////////////////////////////////// // bitwise reverse @@ -736,31 +706,22 @@ def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; -def FRECPE : SInst<"vrecpe", "dd", "fdUiQfQUiQd">; -def FRSQRTE : SInst<"vrsqrte", "dd", "fdUiQfQUiQd">; +def FRECPE : SInst<"vrecpe", "dd", "dQd">; +def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">; def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Comparison -// With additional Qd, Ql, QPl type. -def FCAGE : IInst<"vcage", "udd", "fdQfQd">; -def FCAGT : IInst<"vcagt", "udd", "fdQfQd">; -def FCALE : IInst<"vcale", "udd", "fdQfQd">; -def FCALT : IInst<"vcalt", "udd", "fdQfQd">; -// With additional Ql, QUl, Qd types. -def CMTST : WInst<"vtst", "udd", - "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">; -// With additional l, Ul,d, Qd, Ql, QUl, Qd types. -def CFMEQ : SOpInst<"vceq", "udd", - "csilfUcUsUiUlPcQcdQdQsQiQfQUcQUsQUiQUlQlQPcPlQPl", OP_EQ>; -def CFMGE : SOpInst<"vcge", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GE>; -def CFMLE : SOpInst<"vcle", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LE>; -def CFMGT : SOpInst<"vcgt", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GT>; -def CFMLT : SOpInst<"vclt", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LT>; +def FCAGE : IInst<"vcage", "udd", "dQd">; +def FCAGT : IInst<"vcagt", "udd", "dQd">; +def FCALE : IInst<"vcale", "udd", "dQd">; +def FCALT : IInst<"vcalt", "udd", "dQd">; +def CMTST : WInst<"vtst", "udd", "lUlPlQlQUlQPl">; +def CFMEQ : SOpInst<"vceq", "udd", "lUldQdQlQUlPlQPl", OP_EQ>; +def CFMGE : SOpInst<"vcge", "udd", "lUldQdQlQUl", OP_GE>; +def CFMLE : SOpInst<"vcle", "udd", "lUldQdQlQUl", OP_LE>; +def CFMGT : SOpInst<"vcgt", "udd", "lUldQdQlQUl", OP_GT>; +def CFMLT : SOpInst<"vclt", "udd", "lUldQdQlQUl", OP_LT>; def CMEQ : SInst<"vceqz", "ud", "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; @@ -771,9 +732,8 @@ def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer -// With additional Qd type. -def MAX : SInst<"vmax", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; -def MIN : SInst<"vmin", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; +def MAX : SInst<"vmax", "ddd", "dQd">; +def MIN : SInst<"vmin", "ddd", "dQd">; //////////////////////////////////////////////////////////////////////////////// // MaxNum/MinNum Floating Point @@ -782,9 +742,8 @@ def FMINNM : SInst<"vminnm", "ddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Max/Min -// With additional Qc Qs Qi QUc QUs QUi Qf Qd types. -def MAXP : SInst<"vpmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; -def MINP : SInst<"vpmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; +def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">; +def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise MaxNum/MinNum Floating Point @@ -793,8 +752,7 @@ def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Addition -// With additional Qc Qs Qi QUc QUs QUi Qf Qd types. -def ADDP : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQlQUcQUsQUiQUlQfQd">; +def ADDP : IInst<"vpadd", "ddd", "QcQsQiQlQUcQUsQUiQUlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Shifts by constant @@ -804,11 +762,8 @@ def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", OP_LONG_HI>; //////////////////////////////////////////////////////////////////////////////// -// Shifts with insert, with additional Ql, QPl type. -def SRI_N : WInst<"vsri_n", "dddi", - "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">; -def SLI_N : WInst<"vsli_n", "dddi", - "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">; +def SRI_N : WInst<"vsri_n", "dddi", "PlQPl">; +def SLI_N : WInst<"vsli_n", "dddi", "PlQPl">; // Right shift narrow high def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", @@ -869,12 +824,10 @@ def VMULL_HIGH_P64 : SOpInst<"vmull_high", "rdd", "HPl", OP_MULLHi_P64>; //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector -def GET_LANE : IInst<"vget_lane", "sdi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">; -def SET_LANE : IInst<"vset_lane", "dsdi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">; +def GET_LANE : IInst<"vget_lane", "sdi", "dQdPlQPl">; +def SET_LANE : IInst<"vset_lane", "dsdi", "dQdPlQPl">; def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", - "csilPcPsUcUsUiUlPcPsPlfd", OP_COPY_LN>; + "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>; def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>; def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", @@ -884,26 +837,19 @@ def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value -def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", - "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", - OP_DUP_LN>; +def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>; def VDUP_LANE2: WOpInst<"vdup_laneq", "dki", - "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", + "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; -def DUP_N : WOpInst<"vdup_n", "ds", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQdPlQPl", - OP_DUP>; -def MOV_N : WOpInst<"vmov_n", "ds", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd", - OP_DUP>; +def DUP_N : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>; +def MOV_N : WOpInst<"vmov_n", "ds", "dQd", OP_DUP>; //////////////////////////////////////////////////////////////////////////////// -// Combining vectors, with additional Pl -def COMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfdUcUsUiUlPcPsPl", OP_CONC>; +def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// -//Initialize a vector from bit pattern, with additional Pl -def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPslPl", OP_CAST>; +//Initialize a vector from bit pattern +def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST>; //////////////////////////////////////////////////////////////////////////////// @@ -947,7 +893,7 @@ def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "Qd", OP_MUL_LN>; // Note: d type is handled by SCALAR_VMUL_LANEQ def VMUL_LANEQ : IOpInst<"vmul_laneq", "ddji", - "sifUsUiQsQiQfQUsQUiQfQd", OP_MUL_LN>; + "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>; def VMULL_LANEQ : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>; def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "wkdi", "siUsUi", OP_MULLHi_LN>; @@ -979,12 +925,11 @@ def FMINNMV : SInst<"vminnmv", "sd", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Newly added Vector Extract for f64 -def VEXT_A64 : WInst<"vext", "dddi", - "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQdPlQPl">; +def VEXT_A64 : WInst<"vext", "dddi", "dQdPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Crypto -let isCrypto = 1 in { +let ArchGuard = "__ARM_FEATURE_CRYPTO" in { def AESE : SInst<"vaese", "ddd", "QUc">; def AESD : SInst<"vaesd", "ddd", "QUc">; def AESMC : SInst<"vaesmc", "dd", "QUc">; @@ -1035,10 +980,17 @@ def VQTBX4_A64 : WInst<"vqtbx4", "ddDt", "UccPcQUcQcQPc">; //////////////////////////////////////////////////////////////////////////////// // Vector reinterpret cast operations -// With additional d, Qd, pl, Qpl types -def REINTERPRET + +// NeonEmitter implicitly takes the cartesian product of the type string with +// itself during generation so, unlike all other intrinsics, this one should +// include *all* types, not just additional ones. +// +// We also rely on NeonEmitter handling the 32-bit vreinterpret before the +// 64-bit one so that the common casts don't get guarded as AArch64-only +// (FIXME). +def VVREINTERPRET : NoTestOpInst<"vreinterpret", "dd", - "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT>; + "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT>; //////////////////////////////////////////////////////////////////////////////// @@ -1056,10 +1008,8 @@ def SCALAR_SUB : SInst<"vsub", "sss", "SlSUl">; def SCALAR_QSUB : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">; let InstName = "vmov" in { -def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "csilhfdUcUsUiUlPcPsPl", - OP_HI>; -def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "csilhfdUcUsUiUlPcPsPl", - OP_LO>; +def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "dPl", OP_HI>; +def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "dPl", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 5b0084afdb8a..b33c89d8b206 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -372,13 +372,15 @@ public: void runTests(raw_ostream &o); private: + void emitGuardedIntrinsic(raw_ostream &OS, Record *R, + std::string &CurrentGuard, bool &InGuard, + StringMap<ClassKind> &EmittedMap); void emitIntrinsic(raw_ostream &OS, Record *R, StringMap<ClassKind> &EmittedMap); void genBuiltinsDef(raw_ostream &OS); void genOverloadTypeCheckCode(raw_ostream &OS); void genIntrinsicRangeCheckCode(raw_ostream &OS); - void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, - bool isA64TestGen); + void genTargetTest(raw_ostream &OS); }; } // end anonymous namespace @@ -2731,80 +2733,59 @@ void NeonEmitter::run(raw_ostream &OS) { std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); StringMap<ClassKind> EmittedMap; + std::string CurrentGuard = ""; + bool InGuard = false; - // Emit vmovl, vmull and vabd intrinsics first so they can be used by other - // intrinsics. (Some of the saturating multiply instructions are also - // used to implement the corresponding "_lane" variants, but tablegen - // sorts the records into alphabetical order so that the "_lane" variants - // come after the intrinsics they use.) - emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); + // Some intrinsics are used to express others. These need to be emitted near + // the beginning so that the declarations are present when needed. This is + // rather an ugly, arbitrary list, but probably simpler than actually tracking + // dependency info. + static const char *EarlyDefsArr[] = + { "VFMA", "VQMOVN", "VQMOVUN", "VABD", "VMOVL", + "VABDL", "VGET_HIGH", "VCOMBINE", "VSHLL_N", "VMOVL_HIGH", + "VMULL", "VMLAL_N", "VMLSL_N", "VMULL_N", "VMULL_P64", + "VQDMLAL_N", "VQDMLSL_N", "VQDMULL_N" }; + ArrayRef<const char *> EarlyDefs(EarlyDefsArr); - // ARM intrinsics must be emitted before AArch64 intrinsics to ensure - // common intrinsics appear only once in the output stream. - // The check for uniquiness is done in emitIntrinsic. - // Emit ARM intrinsics. - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; - - // Skip AArch64 intrinsics; they will be emitted at the end. - bool isA64 = R->getValueAsBit("isA64"); - if (isA64) - continue; - - if (R->getName() != "VMOVL" && R->getName() != "VMULL" && - R->getName() != "VABD") - emitIntrinsic(OS, R, EmittedMap); + for (unsigned i = 0; i < EarlyDefs.size(); ++i) { + Record *R = Records.getDef(EarlyDefs[i]); + emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap); } - // Emit AArch64-specific intrinsics. - OS << "#ifdef __aarch64__\n"; - - emitIntrinsic(OS, Records.getDef("VMULL_P64"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); - for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; - - // Skip ARM intrinsics already included above. - bool isA64 = R->getValueAsBit("isA64"); - if (!isA64) + if (std::find(EarlyDefs.begin(), EarlyDefs.end(), R->getName()) != + EarlyDefs.end()) continue; - // Skip crypto temporarily, and will emit them all together at the end. - bool isCrypto = R->getValueAsBit("isCrypto"); - if (isCrypto) - continue; - - emitIntrinsic(OS, R, EmittedMap); + emitGuardedIntrinsic(OS, R, CurrentGuard, InGuard, EmittedMap); } - OS << "#endif\n\n"; - - // Now emit all the crypto intrinsics together - OS << "#ifdef __ARM_FEATURE_CRYPTO\n"; - - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; - - bool isCrypto = R->getValueAsBit("isCrypto"); - if (!isCrypto) - continue; - - emitIntrinsic(OS, R, EmittedMap); - } - - - OS << "#endif\n\n"; + if (InGuard) + OS << "#endif\n\n"; OS << "#undef __ai\n\n"; OS << "#endif /* __ARM_NEON_H */\n"; } +void NeonEmitter::emitGuardedIntrinsic(raw_ostream &OS, Record *R, + std::string &CurrentGuard, bool &InGuard, + StringMap<ClassKind> &EmittedMap) { + + std::string NewGuard = R->getValueAsString("ArchGuard"); + if (NewGuard != CurrentGuard) { + if (InGuard) + OS << "#endif\n\n"; + if (NewGuard.size()) + OS << "#if " << NewGuard << '\n'; + + CurrentGuard = NewGuard; + InGuard = NewGuard.size() != 0; + } + + emitIntrinsic(OS, R, EmittedMap); +} + /// emitIntrinsic - Write out the arm_neon.h header file definitions for the /// intrinsics specified by record R checking for intrinsic uniqueness. void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, @@ -2845,8 +2826,11 @@ void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, } else { std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); - if (EmittedMap.count(s)) + if (EmittedMap.count(s)) { + errs() << "warning: duplicate definition: " << name + << " (type: " << TypeString('d', TypeVec[ti]) << ")\n"; continue; + } EmittedMap[s] = classKind; OS << s; } @@ -3329,10 +3313,10 @@ static std::string GenTest(const std::string &name, /// Write out all intrinsic tests for the specified target, checking /// for intrinsic test uniqueness. -void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, - bool isA64GenTest) { - if (isA64GenTest) - OS << "#ifdef __aarch64__\n"; +void NeonEmitter::genTargetTest(raw_ostream &OS) { + StringMap<OpKind> EmittedMap; + std::string CurrentGuard = ""; + bool InGuard = false; std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); for (unsigned i = 0, e = RV.size(); i != e; ++i) { @@ -3343,12 +3327,17 @@ void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, bool isShift = R->getValueAsBit("isShift"); std::string InstName = R->getValueAsString("InstName"); bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); - bool isA64 = R->getValueAsBit("isA64"); - // do not include AArch64 intrinsic test if not generating - // code for AArch64 - if (!isA64GenTest && isA64) - continue; + std::string NewGuard = R->getValueAsString("ArchGuard"); + if (NewGuard != CurrentGuard) { + if (InGuard) + OS << "#endif\n\n"; + if (NewGuard.size()) + OS << "#if " << NewGuard << '\n'; + + CurrentGuard = NewGuard; + InGuard = NewGuard.size() != 0; + } SmallVector<StringRef, 16> TypeVec; ParseTypes(R, Types, TypeVec); @@ -3370,8 +3359,8 @@ void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, continue; std::string testFuncProto; std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], - isShift, isHiddenLOp, ck, InstName, isA64, - testFuncProto); + isShift, isHiddenLOp, ck, InstName, + CurrentGuard.size(), testFuncProto); if (EmittedMap.count(testFuncProto)) continue; EmittedMap[testFuncProto] = kind; @@ -3379,17 +3368,15 @@ void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, } } else { std::string testFuncProto; - std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, - isHiddenLOp, ck, InstName, isA64, testFuncProto); - if (EmittedMap.count(testFuncProto)) - continue; - EmittedMap[testFuncProto] = kind; + std::string s = + GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, isHiddenLOp, + ck, InstName, CurrentGuard.size(), testFuncProto); OS << s << "\n"; } } } - if (isA64GenTest) + if (InGuard) OS << "#endif\n"; } /// runTests - Write out a complete set of tests for all of the Neon @@ -3409,15 +3396,7 @@ void NeonEmitter::runTests(raw_ostream &OS) { "#include <arm_neon.h>\n" "\n"; - // ARM tests must be emitted before AArch64 tests to ensure - // tests for intrinsics that are common to ARM and AArch64 - // appear only once in the output stream. - // The check for uniqueness is done in genTargetTest. - StringMap<OpKind> EmittedMap; - - genTargetTest(OS, EmittedMap, false); - - genTargetTest(OS, EmittedMap, true); + genTargetTest(OS); } namespace clang {