forked from OSchip/llvm-project
[ARM] Add MVE insert-of-extract pattern
A v4i32 insert of an extract can become a simple lane move, as opposed to round-tripping via a GPR. This adds a patterns that turns an v4i32 insert-extract pair into a EXTRACT_SUBREG/INSERT_SUBREG, with the required COPY_TO_REGCLASS. These get better optimized into a simple lane move by the rest of the backend. Differential Revision: https://reviews.llvm.org/D95428
This commit is contained in:
parent
903a153409
commit
3a5adf8483
|
@ -1851,6 +1851,14 @@ let Predicates = [HasMVEInt] in {
|
|||
(i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
|
||||
def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
|
||||
(MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
|
||||
// This tries to copy from one lane to another, without going via GPR regs
|
||||
def : Pat<(insertelt (v4i32 MQPR:$src1), (extractelt (v4i32 MQPR:$src2), imm:$extlane), imm:$inslane),
|
||||
(v4i32 (COPY_TO_REGCLASS
|
||||
(INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4i32 MQPR:$src1), MQPR)),
|
||||
(f32 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4i32 MQPR:$src2), MQPR)),
|
||||
(SSubReg_f32_reg imm:$extlane))),
|
||||
(SSubReg_f32_reg imm:$inslane)),
|
||||
MQPR))>;
|
||||
|
||||
def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
|
||||
(MVE_VMOV_to_lane_8 MQPR:$src1, rGPR:$src2, imm:$lane)>;
|
||||
|
|
|
@ -55,7 +55,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_sext(<2 x i32> %x) {
|
|||
; CHECK-LABEL: add_v2i32_v2i64_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r0
|
||||
; CHECK-NEXT: vmov q1, q0
|
||||
; CHECK-NEXT: vmov r2, s2
|
||||
; CHECK-NEXT: asrs r1, r0, #31
|
||||
; CHECK-NEXT: vmov.32 q1[1], r1
|
||||
|
@ -889,7 +889,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, i64 %a) {
|
|||
; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r2, s0
|
||||
; CHECK-NEXT: vmov.32 q1[0], r2
|
||||
; CHECK-NEXT: vmov q1, q0
|
||||
; CHECK-NEXT: vmov r3, s2
|
||||
; CHECK-NEXT: asrs r2, r2, #31
|
||||
; CHECK-NEXT: vmov.32 q1[1], r2
|
||||
|
|
Loading…
Reference in New Issue