From f7294ac8093a2fbd8c00254580eaac6c4e1f7b24 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Fri, 7 May 2021 11:04:47 +0800
Subject: [PATCH] [PowerPC] Remove extra swap for extract+vperm on LE

This is a simple fix on LE. On BE, vector shuffles are categorized into
different ops. We may need more work to eliminate these in
tablegen/pre-isel.

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D101605
---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td      | 3 +++
 llvm/test/CodeGen/PowerPC/vec_extract_p9.ll | 7 +++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 89c52fd0aaee..95cf5ba95b13 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2938,6 +2938,9 @@ defm : ScalToVecWPermute<v2f64, (f64 f64:$A),
                                    (SUBREG_TO_REG (i64 1), $A, sub_64), 0),
                          (SUBREG_TO_REG (i64 1), $A, sub_64)>;
 
+def : Pat<(f64 (extractelt (v2f64 (bitconvert (v16i8
+                 (PPCvperm v16i8:$A, v16i8:$B, v16i8:$C)))), 0)),
+          (f64 (EXTRACT_SUBREG (VPERM $B, $A, $C), sub_64))>;
 def : Pat<(f64 (extractelt v2f64:$S, 0)),
           (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
 def : Pat<(f64 (extractelt v2f64:$S, 1)),
diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
index 1ce1d4175398..8f3967403ae6 100644
--- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
@@ -182,10 +182,9 @@ define double @test10(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LE-NEXT:    addi 3, 3, .LCPI9_0@toc@l
 ; CHECK-LE-NEXT:    lxvx 36, 0, 3
 ; CHECK-LE-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
-; CHECK-LE-NEXT:    lfs 1, .LCPI9_1@toc@l(3)
-; CHECK-LE-NEXT:    vperm 2, 2, 3, 4
-; CHECK-LE-NEXT:    xxswapd 0, 34
-; CHECK-LE-NEXT:    xsadddp 1, 0, 1
+; CHECK-LE-NEXT:    lfs 0, .LCPI9_1@toc@l(3)
+; CHECK-LE-NEXT:    vperm 2, 3, 2, 4
+; CHECK-LE-NEXT:    xsadddp 1, 34, 0
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test10: