diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9f3389cb7908..c395a4feea70 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13843,11 +13843,12 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDValue N0 = N->getOperand(0); - // Check for sign- and zero-extensions of vector extract operations of 8- - // and 16-bit vector elements. NEON supports these directly. They are + // Check for sign- and zero-extensions of vector extract operations of 8- and + // 16-bit vector elements. NEON and MVE support these directly. They are // handled during DAG combining because type legalization will promote them // to 32-bit types and it is messy to recognize the operations after that. - if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + if ((ST->hasNEON() || ST->hasMVEIntegerOps()) && + N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue Vec = N0.getOperand(0); SDValue Lane = N0.getOperand(1); EVT VT = N->getValueType(0); diff --git a/llvm/test/CodeGen/Thumb2/mve-extractelt.ll b/llvm/test/CodeGen/Thumb2/mve-extractelt.ll new file mode 100644 index 000000000000..e0f1e9468c5e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-extractelt.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc i32 @u8_explicit_extend(<16 x i8> %a) { +; CHECK-LABEL: u8_explicit_extend: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <16 x i8> %a, i32 10 + %1 = zext i8 %0 to i32 + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @s8_explicit_extend(<16 x i8> %a) { +; CHECK-LABEL: s8_explicit_extend: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.s8 r0, q0[10] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <16 x i8> %a, i32 10 + %1 = sext i8 %0 to i32 + ret i32 %1 +} + +define arm_aapcs_vfpcc i8 @u8_extend_via_pcs(<16 x i8> %a) { +; CHECK-LABEL: u8_extend_via_pcs: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <16 x i8> %a, i32 10 + ret i8 %0 +} + +define arm_aapcs_vfpcc signext i8 @s8_extend_via_pcs(<16 x i8> %a) { +; CHECK-LABEL: s8_extend_via_pcs: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.s8 r0, q0[10] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <16 x i8> %a, i32 10 + ret i8 %0 +} + +define arm_aapcs_vfpcc i32 @u16_explicit_extend(<8 x i16> %a) { +; CHECK-LABEL: u16_explicit_extend: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <8 x i16> %a, i32 5 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @s16_explicit_extend(<8 x i16> %a) { +; CHECK-LABEL: s16_explicit_extend: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.s16 r0, q0[5] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <8 x i16> %a, i32 5 + %1 = sext i16 %0 to i32 + ret i32 %1 +} + +define arm_aapcs_vfpcc i16 @u16_extend_via_pcs(<8 x i16> %a) { +; CHECK-LABEL: u16_extend_via_pcs: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <8 x i16> %a, i32 5 + ret i16 %0 +} + +define arm_aapcs_vfpcc signext i16 @s16_extend_via_pcs(<8 x i16> %a) { +; CHECK-LABEL: s16_extend_via_pcs: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.s16 r0, q0[5] +; CHECK-NEXT: bx lr +entry: + %0 = extractelement <8 x i16> %a, i32 5 + ret i16 %0 +}