forked from OSchip/llvm-project
[ARM,MVE] Use VMOV.{S8,S16} for sign-extended extractelement.
MVE includes instructions that extract an 8- or 16-bit lane from a vector and sign-extend it into the output 32-bit GPR. `ARMInstrMVE.td` already included isel patterns to select those instructions in response to the `ARMISD::VGETLANEs` selection-DAG node type. But `ARMISD::VGETLANEs` was never actually generated, because the code that creates it was conditioned on NEON only. It's an easy fix to enable the same code for integer MVE, and now IR that sign-extends the result of an extractelement (whether explicitly or as part of the function call ABI) will use `vmov.s8` instead of `vmov.u8` followed by `sxtb`. Reviewers: SjoerdMeijer, dmgreen, ostannard Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70132
This commit is contained in:
parent
1d55c9e59e
commit
5b9e4daef0
|
@ -13843,11 +13843,12 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
const ARMSubtarget *ST) {
|
const ARMSubtarget *ST) {
|
||||||
SDValue N0 = N->getOperand(0);
|
SDValue N0 = N->getOperand(0);
|
||||||
|
|
||||||
// Check for sign- and zero-extensions of vector extract operations of 8-
|
// Check for sign- and zero-extensions of vector extract operations of 8- and
|
||||||
// and 16-bit vector elements. NEON supports these directly. They are
|
// 16-bit vector elements. NEON and MVE support these directly. They are
|
||||||
// handled during DAG combining because type legalization will promote them
|
// handled during DAG combining because type legalization will promote them
|
||||||
// to 32-bit types and it is messy to recognize the operations after that.
|
// to 32-bit types and it is messy to recognize the operations after that.
|
||||||
if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
|
||||||
|
N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
||||||
SDValue Vec = N0.getOperand(0);
|
SDValue Vec = N0.getOperand(0);
|
||||||
SDValue Lane = N0.getOperand(1);
|
SDValue Lane = N0.getOperand(1);
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @u8_explicit_extend(<16 x i8> %a) {
|
||||||
|
; CHECK-LABEL: u8_explicit_extend:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.u8 r0, q0[10]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <16 x i8> %a, i32 10
|
||||||
|
%1 = zext i8 %0 to i32
|
||||||
|
ret i32 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @s8_explicit_extend(<16 x i8> %a) {
|
||||||
|
; CHECK-LABEL: s8_explicit_extend:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.s8 r0, q0[10]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <16 x i8> %a, i32 10
|
||||||
|
%1 = sext i8 %0 to i32
|
||||||
|
ret i32 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i8 @u8_extend_via_pcs(<16 x i8> %a) {
|
||||||
|
; CHECK-LABEL: u8_extend_via_pcs:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.u8 r0, q0[10]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <16 x i8> %a, i32 10
|
||||||
|
ret i8 %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc signext i8 @s8_extend_via_pcs(<16 x i8> %a) {
|
||||||
|
; CHECK-LABEL: s8_extend_via_pcs:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.s8 r0, q0[10]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <16 x i8> %a, i32 10
|
||||||
|
ret i8 %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @u16_explicit_extend(<8 x i16> %a) {
|
||||||
|
; CHECK-LABEL: u16_explicit_extend:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <8 x i16> %a, i32 5
|
||||||
|
%1 = zext i16 %0 to i32
|
||||||
|
ret i32 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i32 @s16_explicit_extend(<8 x i16> %a) {
|
||||||
|
; CHECK-LABEL: s16_explicit_extend:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.s16 r0, q0[5]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <8 x i16> %a, i32 5
|
||||||
|
%1 = sext i16 %0 to i32
|
||||||
|
ret i32 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc i16 @u16_extend_via_pcs(<8 x i16> %a) {
|
||||||
|
; CHECK-LABEL: u16_extend_via_pcs:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.u16 r0, q0[5]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <8 x i16> %a, i32 5
|
||||||
|
ret i16 %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc signext i16 @s16_extend_via_pcs(<8 x i16> %a) {
|
||||||
|
; CHECK-LABEL: s16_extend_via_pcs:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vmov.s16 r0, q0[5]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%0 = extractelement <8 x i16> %a, i32 5
|
||||||
|
ret i16 %0
|
||||||
|
}
|
Loading…
Reference in New Issue