diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index d4a03c0dfd4f..c6e765e57812 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -1175,135 +1175,213 @@ def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo; // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST2ln, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", - "", []>; + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), + IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", + "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} -def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; -def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; -def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; +def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} def VST2LNd8Pseudo : VSTQLNPseudo; def VST2LNd16Pseudo : VSTQLNPseudo; def VST2LNd32Pseudo : VSTQLNPseudo; // ...with double-spaced registers: -def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; -def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; +def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} +def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{4} = Rn{4}; +} def VST2LNq16Pseudo : VSTQQLNPseudo; def VST2LNq32Pseudo : VSTQQLNPseudo; // ...with address register writeback: class VST2LNWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []>; + "$addr.addr = $wb", []> { + let Inst{4} = Rn{4}; +} -def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; -def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; -def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; +def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo; def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo; def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; -def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; +def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; +} def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo; def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { + let Rm = 0b1111; +} -def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; -def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; -def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; +def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} def VST3LNd8Pseudo : VSTQQLNPseudo; def VST3LNd16Pseudo : VSTQQLNPseudo; def VST3LNd32Pseudo : VSTQQLNPseudo; // ...with double-spaced registers: -def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; -def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; +def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} def VST3LNq16Pseudo : VSTQQQQLNPseudo; def VST3LNq32Pseudo : VSTQQQQLNPseudo; // ...with address register writeback: class VST3LNWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3lnu, "vst3", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []>; + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []>; -def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; -def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; -def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; +def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo; def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo; def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; -def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; +def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", - "", []>; + "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", + "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} -def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; -def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; -def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; +def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} def VST4LNd8Pseudo : VSTQQLNPseudo; def VST4LNd16Pseudo : VSTQQLNPseudo; def VST4LNd32Pseudo : VSTQQLNPseudo; // ...with double-spaced registers: -def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; -def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; +def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} def VST4LNq16Pseudo : VSTQQQQLNPseudo; def VST4LNq32Pseudo : VSTQQQQLNPseudo; // ...with address register writeback: class VST4LNWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4lnu, "vst4", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []>; + "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} -def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; -def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; -def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; +def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo; def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo; def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; -def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; +def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; diff --git a/llvm/test/MC/ARM/neon-vst-encoding.s b/llvm/test/MC/ARM/neon-vst-encoding.s index 2fc5fae09ad2..c595aa2d5a4d 100644 --- a/llvm/test/MC/ARM/neon-vst-encoding.s +++ b/llvm/test/MC/ARM/neon-vst-encoding.s @@ -66,3 +66,36 @@ vst4.32 {d16, d18, d20, d22}, [r0]! @ CHECK: vst4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf4] vst4.32 {d17, d19, d21, d23}, [r0]! + +@ CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4] + vst2.8 {d16[1], d17[1]}, [r0, :16] +@ CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4] + vst2.16 {d16[1], d17[1]}, [r0, :32] +@ CHECK: vst2.32 {d16[1], d17[1]}, [r0] @ encoding: [0x8f,0x09,0xc0,0xf4] + vst2.32 {d16[1], d17[1]}, [r0] +@ CHECK: vst2.16 {d17[1], d19[1]}, [r0] @ encoding: [0x6f,0x15,0xc0,0xf4] + vst2.16 {d17[1], d19[1]}, [r0] +@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4] + vst2.32 {d17[0], d19[0]}, [r0, :64] + +@ CHECK: vst3.8 {d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf4] + vst3.8 {d16[1], d17[1], d18[1]}, [r0] +@ CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf4] + vst3.16 {d16[1], d17[1], d18[1]}, [r0] +@ CHECK: vst3.32 {d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf4] + vst3.32 {d16[1], d17[1], d18[1]}, [r0] +@ CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf4] + vst3.16 {d17[2], d19[2], d21[2]}, [r0] +@ CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf4] + vst3.32 {d16[0], d18[0], d20[0]}, [r0] + +@ CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4] + vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32] +@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4] + vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] +@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4] + vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] +@ CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4] + vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64] +@ CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4] + vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]