From 0c1000cbd6d25d749c78f9c27fa985a2608ff217 Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Mon, 9 May 2022 10:51:08 -0500 Subject: [PATCH] [NFC][PowerPC] Add 32-bit AIX RUN lines to test cases. This patch adds 32-bit AIX RUN lines to several test cases, along with the addition of one new test case, to prepare for future codegen changes involving the PPCISD::SCALAR_TO_VECTOR_PERMUTED node on 32-bit mode. --- .../PowerPC/aix_scalar_vector_permuted.ll | 86 ++++ .../PowerPC/canonical-merge-shuffles.ll | 259 ++++++++++ .../CodeGen/PowerPC/float-vector-gather.ll | 21 + .../CodeGen/PowerPC/load-v4i8-improved.ll | 18 +- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll | 19 + .../CodeGen/PowerPC/reduce_scalarization.ll | 166 +++++++ .../CodeGen/PowerPC/scalar_vector_test_4.ll | 292 +++++++++++ llvm/test/CodeGen/PowerPC/vec_insert_elt.ll | 453 ++++++++++++++++++ 8 files changed, 1313 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll diff --git a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll new file mode 100644 index 000000000000..71d70167b12c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=AIX-P8-64 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=AIX-P8-32 +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=AIX-P9-64 +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=AIX-P9-32 + +%d8 = type <8 x double> +%f1 = type <1 x float> +%f2 = type <2 x float> +%f4 = type <4 x float> +%f8 = type <8 x float> +%i4 = type <4 x i32> + +define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) { +; AIX-P8-64-LABEL: test_f2: +; AIX-P8-64: # %bb.0: +; AIX-P8-64-NEXT: lfdx f0, 0, r3 +; AIX-P8-64-NEXT: lfdx f1, 0, r4 +; AIX-P8-64-NEXT: xvaddsp vs0, vs0, vs1 +; AIX-P8-64-NEXT: stfdx f0, 0, r5 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: test_f2: +; AIX-P8-32: # %bb.0: +; AIX-P8-32-NEXT: lfs f0, 4(r3) +; AIX-P8-32-NEXT: lfs f1, 0(r3) +; AIX-P8-32-NEXT: lwz r6, L..C0(r2) # %const.0 +; AIX-P8-32-NEXT: lfs f2, 4(r4) +; AIX-P8-32-NEXT: xscvdpspn v2, f0 +; AIX-P8-32-NEXT: lfs f0, 0(r4) +; AIX-P8-32-NEXT: lxvw4x v0, 0, r6 +; AIX-P8-32-NEXT: xscvdpspn v3, f1 +; AIX-P8-32-NEXT: xscvdpspn v4, f2 +; AIX-P8-32-NEXT: xscvdpspn v5, f0 +; AIX-P8-32-NEXT: vperm v2, v3, v2, v0 +; AIX-P8-32-NEXT: vperm v3, v5, v4, v0 +; AIX-P8-32-NEXT: xvaddsp vs0, v2, v3 +; AIX-P8-32-NEXT: xxsldwi vs1, vs0, vs0, 1 +; AIX-P8-32-NEXT: xscvspdpn f0, vs0 +; AIX-P8-32-NEXT: xscvspdpn f1, vs1 +; AIX-P8-32-NEXT: stfs f0, 0(r5) +; AIX-P8-32-NEXT: stfs f1, 4(r5) +; AIX-P8-32-NEXT: blr +; +; AIX-P9-64-LABEL: test_f2: +; AIX-P9-64: # %bb.0: +; AIX-P9-64-NEXT: lfd f0, 0(r3) +; AIX-P9-64-NEXT: lfd f1, 0(r4) +; AIX-P9-64-NEXT: xvaddsp vs0, vs0, vs1 +; AIX-P9-64-NEXT: stfd f0, 0(r5) +; AIX-P9-64-NEXT: blr +; +; AIX-P9-32-LABEL: test_f2: +; AIX-P9-32: # %bb.0: +; AIX-P9-32-NEXT: lfs f0, 0(r3) +; AIX-P9-32-NEXT: lwz r3, 4(r3) +; AIX-P9-32-NEXT: mtfprwz f1, r3 +; AIX-P9-32-NEXT: lwz r3, 4(r4) +; AIX-P9-32-NEXT: xscvdpspn vs0, f0 +; AIX-P9-32-NEXT: mtfprwz f2, r3 +; AIX-P9-32-NEXT: xxinsertw vs0, vs1, 4 +; AIX-P9-32-NEXT: lfs f1, 0(r4) +; AIX-P9-32-NEXT: xscvdpspn vs1, f1 +; AIX-P9-32-NEXT: xxinsertw vs1, vs2, 4 +; AIX-P9-32-NEXT: xvaddsp vs0, vs0, vs1 +; AIX-P9-32-NEXT: xscvspdpn f1, vs0 +; AIX-P9-32-NEXT: xxsldwi vs0, vs0, vs0, 1 +; AIX-P9-32-NEXT: xscvspdpn f0, vs0 +; AIX-P9-32-NEXT: stfs f1, 0(r5) +; AIX-P9-32-NEXT: stfs f0, 4(r5) +; AIX-P9-32-NEXT: blr + %p = load %f2, %f2* %P + %q = load %f2, %f2* %Q + %R = fadd %f2 %p, %q + store %f2 %R, %f2* %S + ret void +} + diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index 12c2505f2b2e..1d424446db7c 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -14,6 +14,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-P7 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefixes=P8-AIX,P8-AIX-64 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefixes=P8-AIX,P8-AIX-32 define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { ; CHECK-P8-LABEL: testmrghb: @@ -40,6 +46,11 @@ define dso_local <16 x i8> @testmrghb(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: vmrghb v2, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrghb: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrglb v2, v2, v3 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -75,6 +86,11 @@ define dso_local <16 x i8> @testmrghb2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-P7-NEXT: lvx v4, 0, r3 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrghb2: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrglb v2, v3, v2 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -104,6 +120,11 @@ define dso_local <16 x i8> @testmrghh(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: vmrghh v2, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrghh: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrglh v2, v2, v3 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -139,6 +160,11 @@ define dso_local <16 x i8> @testmrghh2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-P7-NEXT: lvx v4, 0, r3 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrghh2: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrglh v2, v3, v2 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -168,6 +194,11 @@ define dso_local <16 x i8> @testmrglb(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: vmrglb v2, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrglb: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrghb v2, v2, v3 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -203,6 +234,11 @@ define dso_local <16 x i8> @testmrglb2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-P7-NEXT: lvx v4, 0, r3 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrglb2: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrghb v2, v3, v2 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -232,6 +268,11 @@ define dso_local <16 x i8> @testmrglh(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: vmrglh v2, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrglh: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrghh v2, v2, v3 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -267,6 +308,11 @@ define dso_local <16 x i8> @testmrglh2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-P7-NEXT: lvx v4, 0, r3 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrglh2: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: vmrghh v2, v3, v2 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -296,6 +342,11 @@ define dso_local <16 x i8> @testmrghw(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: xxmrghw v2, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrghw: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: xxmrglw v2, v2, v3 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -331,6 +382,11 @@ define dso_local <16 x i8> @testmrghw2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-P7-NEXT: lvx v4, 0, r3 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrghw2: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: xxmrglw v2, v3, v2 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -360,6 +416,11 @@ define dso_local <16 x i8> @testmrglw(<16 x i8> %a, <16 x i8> %b) local_unnamed_ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: xxmrglw v2, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrglw: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: xxmrghw v2, v2, v3 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -395,6 +456,11 @@ define dso_local <16 x i8> @testmrglw2(<16 x i8> %a, <16 x i8> %b) local_unnamed ; CHECK-P7-NEXT: lvx v4, 0, r3 ; CHECK-P7-NEXT: vperm v2, v3, v2, v4 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testmrglw2: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: xxmrghw v2, v3, v2 +; P8-AIX-NEXT: blr entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -451,6 +517,32 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un ; CHECK-P7-NEXT: xxswapd v2, vs0 ; CHECK-P7-NEXT: vperm v2, v2, v4, v3 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: testmrglb3: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r4, L..C0(r2) # %const.0 +; P8-AIX-64-NEXT: lxsdx v2, 0, r3 +; P8-AIX-64-NEXT: xxlxor v4, v4, v4 +; P8-AIX-64-NEXT: lxvw4x v3, 0, r4 +; P8-AIX-64-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: testmrglb3: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r5, 4(r3) +; P8-AIX-32-NEXT: lwz r4, L..C0(r2) # %const.0 +; P8-AIX-32-NEXT: stw r5, -32(r1) +; P8-AIX-32-NEXT: lwz r3, 0(r3) +; P8-AIX-32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX-32-NEXT: addi r4, r1, -16 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -32 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-32-NEXT: vperm v2, v4, v3, v2 +; P8-AIX-32-NEXT: xxlxor v3, v3, v3 +; P8-AIX-32-NEXT: vmrghb v2, v3, v2 +; P8-AIX-32-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %a, align 8 %1 = zext <8 x i8> %0 to <8 x i16> @@ -528,6 +620,34 @@ define dso_local void @no_crash_elt0_from_RHS(<2 x double>* noalias nocapture de ; CHECK-P7-NEXT: xxmrghd vs0, vs1, vs0 ; CHECK-P7-NEXT: xxswapd vs0, vs0 ; CHECK-P7-NEXT: stxvd2x vs0, 0, r30 +; +; P8-AIX-64-LABEL: no_crash_elt0_from_RHS: +; P8-AIX-64: # %bb.0: # %test_entry +; P8-AIX-64-NEXT: mflr r0 +; P8-AIX-64-NEXT: std r0, 16(r1) +; P8-AIX-64-NEXT: stdu r1, -128(r1) +; P8-AIX-64-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; P8-AIX-64-NEXT: mr r31, r3 +; P8-AIX-64-NEXT: bl .dummy[PR] +; P8-AIX-64-NEXT: nop +; P8-AIX-64-NEXT: xxlxor f0, f0, f0 +; P8-AIX-64-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-AIX-64-NEXT: xxmrghd vs0, vs0, vs1 +; P8-AIX-64-NEXT: stxvd2x vs0, 0, r31 +; +; P8-AIX-32-LABEL: no_crash_elt0_from_RHS: +; P8-AIX-32: # %bb.0: # %test_entry +; P8-AIX-32-NEXT: mflr r0 +; P8-AIX-32-NEXT: stw r0, 8(r1) +; P8-AIX-32-NEXT: stwu r1, -64(r1) +; P8-AIX-32-NEXT: stw r31, 60(r1) # 4-byte Folded Spill +; P8-AIX-32-NEXT: mr r31, r3 +; P8-AIX-32-NEXT: bl .dummy[PR] +; P8-AIX-32-NEXT: nop +; P8-AIX-32-NEXT: xxlxor f0, f0, f0 +; P8-AIX-32-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-AIX-32-NEXT: xxmrghd vs0, vs0, vs1 +; P8-AIX-32-NEXT: stxvd2x vs0, 0, r31 test_entry: %_div_result = tail call double @dummy() %oldret = insertvalue { double, double } undef, double %_div_result, 0 @@ -577,6 +697,22 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) { ; CHECK-P7-NEXT: lvx v2, 0, r4 ; CHECK-P7-NEXT: vperm v2, v3, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: no_crash_bitcast: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: mtfprwz f0, r3 +; P8-AIX-64-NEXT: xxmrghw v2, vs0, vs0 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: no_crash_bitcast: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r4, L..C1(r2) # %const.0 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -16 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 +; P8-AIX-32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX-32-NEXT: vperm v2, v3, v3, v2 +; P8-AIX-32-NEXT: blr entry: %cast = bitcast i32 %a to <4 x i8> %ret = shufflevector <4 x i8> %cast, <4 x i8> undef, <16 x i32> @@ -632,6 +768,24 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_ ; CHECK-P7-NEXT: lvx v4, 0, r3 ; CHECK-P7-NEXT: vperm v2, v4, v2, v3 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: replace_undefs_in_splat: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r3, L..C1(r2) # %const.0 +; P8-AIX-64-NEXT: ld r4, L..C2(r2) # %const.1 +; P8-AIX-64-NEXT: lxvw4x v3, 0, r3 +; P8-AIX-64-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-64-NEXT: vperm v2, v2, v4, v3 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: replace_undefs_in_splat: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r3, L..C2(r2) # %const.0 +; P8-AIX-32-NEXT: lwz r4, L..C3(r2) # %const.1 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-32-NEXT: vperm v2, v2, v4, v3 +; P8-AIX-32-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> %a, <4 x i32> , <4 x i32> ret <4 x i32> %vecins1 @@ -683,6 +837,30 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture re ; CHECK-P7-NEXT: xxspltd v2, f0, 0 ; CHECK-P7-NEXT: vmrglb v2, v3, v2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: no_RAUW_in_combine_during_legalize: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: sldi r4, r4, 2 +; P8-AIX-64-NEXT: xxlxor v3, v3, v3 +; P8-AIX-64-NEXT: lxsiwzx v2, r3, r4 +; P8-AIX-64-NEXT: vmrghb v2, v2, v3 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: slwi r4, r4, 2 +; P8-AIX-32-NEXT: xxlxor v3, v3, v3 +; P8-AIX-32-NEXT: lwzx r3, r3, r4 +; P8-AIX-32-NEXT: li r4, 0 +; P8-AIX-32-NEXT: stw r4, -32(r1) +; P8-AIX-32-NEXT: addi r4, r1, -16 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -32 +; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3 +; P8-AIX-32-NEXT: lxvw4x vs1, 0, r4 +; P8-AIX-32-NEXT: xxmrghw v2, vs0, vs1 +; P8-AIX-32-NEXT: vmrghb v2, v2, v3 +; P8-AIX-32-NEXT: blr entry: %idx.ext = sext i32 %offset to i64 %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 %idx.ext @@ -731,6 +909,19 @@ define dso_local <4 x i32> @testSplat4Low(<8 x i8>* nocapture readonly %ptr) loc ; CHECK-P7-NEXT: xxswapd v2, vs0 ; CHECK-P7-NEXT: xxspltw v2, v2, 2 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: testSplat4Low: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: lfdx f0, 0, r3 +; P8-AIX-64-NEXT: xxspltw v2, vs0, 1 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: testSplat4Low: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: addi r3, r3, 4 +; P8-AIX-32-NEXT: lfiwzx f0, 0, r3 +; P8-AIX-32-NEXT: xxspltw v2, vs0, 1 +; P8-AIX-32-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %ptr, align 8 %vecinit18 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> @@ -774,6 +965,19 @@ define dso_local <4 x i32> @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca ; CHECK-P7-NEXT: xxswapd v2, vs0 ; CHECK-P7-NEXT: xxspltw v2, v2, 3 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: testSplat4hi: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: lfdx f0, 0, r3 +; P8-AIX-64-NEXT: xxspltw v2, vs0, 0 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: testSplat4hi: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r3, 0(r3) +; P8-AIX-32-NEXT: mtfprwz f0, r3 +; P8-AIX-32-NEXT: xxspltw v2, vs0, 1 +; P8-AIX-32-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %ptr, align 8 %vecinit22 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> @@ -814,6 +1018,27 @@ define dso_local <2 x i64> @testSplat8(<8 x i8>* nocapture readonly %ptr) local_ ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: lxvdsx v2, 0, r3 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: testSplat8: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: lxvdsx v2, 0, r3 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: testSplat8: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r5, 4(r3) +; P8-AIX-32-NEXT: lwz r4, L..C4(r2) # %const.0 +; P8-AIX-32-NEXT: stw r5, -32(r1) +; P8-AIX-32-NEXT: lwz r3, 0(r3) +; P8-AIX-32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX-32-NEXT: addi r4, r1, -16 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -32 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-32-NEXT: vperm v2, v4, v3, v2 +; P8-AIX-32-NEXT: xxmrghd v2, v2, v2 +; P8-AIX-32-NEXT: blr entry: %0 = load <8 x i8>, <8 x i8>* %ptr, align 8 %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> @@ -850,6 +1075,26 @@ define <2 x i64> @testSplati64_0(<1 x i64>* nocapture readonly %ptr) #0 { ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: lxvdsx v2, 0, r3 ; CHECK-P7-NEXT: blr +; +; P8-AIX-64-LABEL: testSplati64_0: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: lxvdsx v2, 0, r3 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: testSplati64_0: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0 +; P8-AIX-32-NEXT: lwz r5, 4(r3) +; P8-AIX-32-NEXT: lwz r3, 0(r3) +; P8-AIX-32-NEXT: stw r5, -16(r1) +; P8-AIX-32-NEXT: stw r3, -32(r1) +; P8-AIX-32-NEXT: addi r3, r1, -16 +; P8-AIX-32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX-32-NEXT: addi r4, r1, -32 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r3 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-32-NEXT: vperm v2, v4, v3, v2 +; P8-AIX-32-NEXT: blr entry: %0 = load <1 x i64>, <1 x i64>* %ptr, align 8 %1 = shufflevector <1 x i64> %0, <1 x i64> undef, <2 x i32> @@ -894,6 +1139,12 @@ define <2 x i64> @testSplati64_1(<2 x i64>* nocapture readonly %ptr) #0 { ; CHECK-P7-NEXT: lxvd2x vs0, 0, r3 ; CHECK-P7-NEXT: xxspltd v2, vs0, 1 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testSplati64_1: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: addi r3, r3, 8 +; P8-AIX-NEXT: lxvdsx v2, 0, r3 +; P8-AIX-NEXT: blr entry: %0 = load <2 x i64>, <2 x i64>* %ptr, align 8 %1 = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> @@ -941,6 +1192,14 @@ define dso_local void @testByteSplat() #0 { ; CHECK-P7-NEXT: vspltb v2, v2, 15 ; CHECK-P7-NEXT: stvx v2, 0, r3 ; CHECK-P7-NEXT: blr +; +; P8-AIX-LABEL: testByteSplat: +; P8-AIX: # %bb.0: # %entry +; P8-AIX-NEXT: lbzx r3, 0, r3 +; P8-AIX-NEXT: mtvsrwz v2, r3 +; P8-AIX-NEXT: vspltb v2, v2, 7 +; P8-AIX-NEXT: stxvw4x v2, 0, r3 +; P8-AIX-NEXT: blr entry: %0 = load i8, i8* undef, align 1 %splat.splatinsert.i = insertelement <16 x i8> poison, i8 %0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll index 672a8f3e82aa..0b3e75e1c370 100644 --- a/llvm/test/CodeGen/PowerPC/float-vector-gather.ll +++ b/llvm/test/CodeGen/PowerPC/float-vector-gather.ll @@ -6,6 +6,12 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \ ; RUN: | FileCheck %s -check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s \ +; RUN: | FileCheck %s -check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s \ +; RUN: | FileCheck %s -check-prefix=CHECK-BE-AIX-32 define dso_local <4 x float> @vector_gatherf(float* nocapture readonly %a, float* nocapture readonly %b, float* nocapture readonly %c, float* nocapture readonly %d) { @@ -34,6 +40,21 @@ float* nocapture readonly %d) { ; CHECK-BE-DAG: xxmrghw vs[[REG4:[0-9]+]], vs[[REG2]], vs[[REG3]] ; CHECK-BE-NEXT: xxmrgld v[[REG:[0-9]+]], vs[[REG0]], vs[[REG4]] ; CHECK-BE-NEXT: blr + +; CHECK-BE-AIX-32-LABEL: vector_gatherf: +; CHECK-BE-AIX-32-LABEL: # %bb.0: # %entry +; CHECK-BE-AIX-32-DAG: lfs f[[REG0:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lfs f[[REG1:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lfs f[[REG2:[0-9]+]] +; CHECK-BE-AIX-32-DAG: lfs f[[REG3:[0-9]+]] +; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG0]] +; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG1:[0-9]+]], f[[REG1]] +; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG2:[0-9]+]], f[[REG2]] +; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG3]] +; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG1]], v[[VREG0]], v[[VREG1]] +; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG0]], v[[VREG2]], v[[VREG0]] +; CHECK-BE-AIX-32-NEXT: xxmrghd v[[VREG1]], v[[VREG0]], v[[VREG1]] +; CHECK-BE-AIX-32-NEXT: blr entry: %0 = load float, float* %a, align 4 %vecinit = insertelement <4 x float> undef, float %0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll index 42b449f22776..816026379224 100644 --- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -1,9 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-ibm-aix-xcoff < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \ +; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc-ibm-aix-xcoff < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \ +; RUN: --check-prefix=CHECK-AIX-32 -implicit-check-not vmrg \ +; RUN: -implicit-check-not=vperm %s define <16 x i8> @test(i32* %s, i32* %t) { ; CHECK-LE-LABEL: test: @@ -11,12 +19,20 @@ define <16 x i8> @test(i32* %s, i32* %t) { ; CHECK-LE-NEXT: lfiwzx f0, 0, r3 ; CHECK-LE-NEXT: xxspltw v2, vs0, 1 ; CHECK-LE-NEXT: blr - +; ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lfiwzx f0, 0, r3 ; CHECK-NEXT: xxspltw v2, vs0, 1 ; CHECK-NEXT: blr +; +; CHECK-AIX-32-LABEL: test: +; CHECK-AIX-32: # %bb.0: # %entry +; CHECK-AIX-32-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-NEXT: mtfprwz f0, r3 +; CHECK-AIX-32-NEXT: xxspltw v2, vs0, 1 +; CHECK-AIX-32-NEXT: blr + entry: %0 = bitcast i32* %s to <4 x i8>* %1 = load <4 x i8>, <4 x i8>* %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll index 5ab2da486963..f3959c3c8ec9 100644 --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -10,6 +10,10 @@ ; RUN: -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi \ ; RUN: < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc-ibm-aix-xcoff -vec-extabi \ +; RUN: < %s | FileCheck %s --check-prefix=P9BE-32 + define void @test64(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; CHECK-LABEL: test64: ; CHECK-NOT: ldux @@ -21,6 +25,11 @@ define void @test64(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; P9BE-NOT: mtvsrd ; P9BE: lxsdx [[REG:[0-9]+]] ; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] +; P9BE-32-LABEL: test64: +; P9BE-32: lwzux [[REG1:[0-9]+]] +; P9BE-32: mtfprwz [[REG2:[0-9]+]], [[REG1]] +; P9BE-32: xxinsertw [[REG3:[0-9]+]], [[REG2]] +; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG3]] entry: %idx.ext63 = sext i32 %i_pix2 to i64 %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63 @@ -56,6 +65,10 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) { ; P9BE-NOT: mtvsrwz ; P9BE: lxsiwzx [[REG:[0-9]+]] ; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] +; P9BE-32-LABEL: test32: +; P9BE-32: lwzx [[REG1:[0-9]+]] +; P9BE-32: mtvsrwz [[REG2:[0-9]+]], [[REG1]] +; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG2]] entry: %idx.ext63 = sext i32 %i_pix2 to i64 %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63 @@ -89,6 +102,9 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe ; P9BE-NOT: lhzux ; P9BE: lxsihzx [[REG:[0-9]+]] ; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] +; P9BE-32-LABEL: test16: +; P9BE-32: lhzux [[REG1:[0-9]+]] +; P9BE-32: vmrghh {{[0-9]+}}, {{[0-9]+}}, [[REG1]] entry: %idxprom = sext i32 %delta to i64 %add14 = add nsw i32 %delta, 8 @@ -130,6 +146,9 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext ; P9BE-NOT: lbzux ; P9BE: lxsibzx [[REG:[0-9]+]] ; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] +; P9BE-32-LABEL: test8: +; P9BE-32: lxsibzx [[REG:[0-9]+]] +; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] entry: %idxprom = sext i32 %delta to i64 %add14 = add nsw i32 %delta, 8 diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll index 5fdd21e9a0b6..d7883b540d89 100644 --- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll +++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll @@ -11,6 +11,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=AIX-64 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=AIX-32 ; Function Attrs: norecurse nounwind readonly define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) { @@ -20,6 +26,20 @@ define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) { ; CHECK-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-NEXT: xvcvspdp v2, vs0 ; CHECK-NEXT: blr +; +; AIX-64-LABEL: test1: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: lfdx f0, 0, r3 +; AIX-64-NEXT: xxmrghw vs0, vs0, vs0 +; AIX-64-NEXT: xvcvspdp v2, vs0 +; AIX-64-NEXT: blr +; +; AIX-32-LABEL: test1: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: lfs f0, 4(r3) +; AIX-32-NEXT: lfs f1, 0(r3) +; AIX-32-NEXT: xxmrghd v2, vs1, vs0 +; AIX-32-NEXT: blr entry: %0 = load <2 x float>, <2 x float>* %Ptr, align 8 %1 = fpext <2 x float> %0 to <2 x double> @@ -36,6 +56,36 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl ; CHECK-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-NEXT: xvcvspdp v2, vs0 ; CHECK-NEXT: blr +; +; AIX-64-LABEL: test2: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: lfdx f0, 0, r3 +; AIX-64-NEXT: lfdx f1, 0, r4 +; AIX-64-NEXT: xvsubsp vs0, vs0, vs1 +; AIX-64-NEXT: xxmrghw vs0, vs0, vs0 +; AIX-64-NEXT: xvcvspdp v2, vs0 +; AIX-64-NEXT: blr +; +; AIX-32-LABEL: test2: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: lfs f0, 4(r3) +; AIX-32-NEXT: lfs f1, 0(r3) +; AIX-32-NEXT: lwz r5, L..C0(r2) # %const.0 +; AIX-32-NEXT: lfs f2, 4(r4) +; AIX-32-NEXT: xscvdpspn v2, f0 +; AIX-32-NEXT: lfs f0, 0(r4) +; AIX-32-NEXT: lxvw4x v0, 0, r5 +; AIX-32-NEXT: xscvdpspn v3, f1 +; AIX-32-NEXT: xscvdpspn v4, f2 +; AIX-32-NEXT: xscvdpspn v5, f0 +; AIX-32-NEXT: vperm v2, v3, v2, v0 +; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: xvsubsp vs0, v2, v3 +; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 +; AIX-32-NEXT: xscvspdpn f0, vs0 +; AIX-32-NEXT: xscvspdpn f1, vs1 +; AIX-32-NEXT: xxmrghd v2, vs0, vs1 +; AIX-32-NEXT: blr entry: %0 = load <2 x float>, <2 x float>* %a, align 8 %1 = load <2 x float>, <2 x float>* %b, align 8 @@ -55,6 +105,36 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl ; CHECK-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-NEXT: xvcvspdp v2, vs0 ; CHECK-NEXT: blr +; +; AIX-64-LABEL: test3: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: lfdx f0, 0, r3 +; AIX-64-NEXT: lfdx f1, 0, r4 +; AIX-64-NEXT: xvaddsp vs0, vs0, vs1 +; AIX-64-NEXT: xxmrghw vs0, vs0, vs0 +; AIX-64-NEXT: xvcvspdp v2, vs0 +; AIX-64-NEXT: blr +; +; AIX-32-LABEL: test3: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: lfs f0, 4(r3) +; AIX-32-NEXT: lfs f1, 0(r3) +; AIX-32-NEXT: lwz r5, L..C1(r2) # %const.0 +; AIX-32-NEXT: lfs f2, 4(r4) +; AIX-32-NEXT: xscvdpspn v2, f0 +; AIX-32-NEXT: lfs f0, 0(r4) +; AIX-32-NEXT: lxvw4x v0, 0, r5 +; AIX-32-NEXT: xscvdpspn v3, f1 +; AIX-32-NEXT: xscvdpspn v4, f2 +; AIX-32-NEXT: xscvdpspn v5, f0 +; AIX-32-NEXT: vperm v2, v3, v2, v0 +; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: xvaddsp vs0, v2, v3 +; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 +; AIX-32-NEXT: xscvspdpn f0, vs0 +; AIX-32-NEXT: xscvspdpn f1, vs1 +; AIX-32-NEXT: xxmrghd v2, vs0, vs1 +; AIX-32-NEXT: blr entry: %0 = load <2 x float>, <2 x float>* %a, align 8 %1 = load <2 x float>, <2 x float>* %b, align 8 @@ -74,6 +154,36 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl ; CHECK-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-NEXT: xvcvspdp v2, vs0 ; CHECK-NEXT: blr +; +; AIX-64-LABEL: test4: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: lfdx f0, 0, r3 +; AIX-64-NEXT: lfdx f1, 0, r4 +; AIX-64-NEXT: xvmulsp vs0, vs0, vs1 +; AIX-64-NEXT: xxmrghw vs0, vs0, vs0 +; AIX-64-NEXT: xvcvspdp v2, vs0 +; AIX-64-NEXT: blr +; +; AIX-32-LABEL: test4: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: lfs f0, 4(r3) +; AIX-32-NEXT: lfs f1, 0(r3) +; AIX-32-NEXT: lwz r5, L..C2(r2) # %const.0 +; AIX-32-NEXT: lfs f2, 4(r4) +; AIX-32-NEXT: xscvdpspn v2, f0 +; AIX-32-NEXT: lfs f0, 0(r4) +; AIX-32-NEXT: lxvw4x v0, 0, r5 +; AIX-32-NEXT: xscvdpspn v3, f1 +; AIX-32-NEXT: xscvdpspn v4, f2 +; AIX-32-NEXT: xscvdpspn v5, f0 +; AIX-32-NEXT: vperm v2, v3, v2, v0 +; AIX-32-NEXT: vperm v3, v5, v4, v0 +; AIX-32-NEXT: xvmulsp vs0, v2, v3 +; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1 +; AIX-32-NEXT: xscvspdpn f0, vs0 +; AIX-32-NEXT: xscvspdpn f1, vs1 +; AIX-32-NEXT: xxmrghd v2, vs0, vs1 +; AIX-32-NEXT: blr entry: %0 = load <2 x float>, <2 x float>* %a, align 8 %1 = load <2 x float>, <2 x float>* %b, align 8 @@ -102,6 +212,24 @@ define dso_local <2 x double> @test5(<2 x double> %a) { ; CHECK-P10-BE-NEXT: xvcvspdp vs0, vs0 ; CHECK-P10-BE-NEXT: xvadddp v2, vs0, v2 ; CHECK-P10-BE-NEXT: blr +; +; AIX-64-LABEL: test5: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: ld r3, L..C0(r2) # @G +; AIX-64-NEXT: lfdx f0, 0, r3 +; AIX-64-NEXT: xxmrghw vs0, vs0, vs0 +; AIX-64-NEXT: xvcvspdp vs0, vs0 +; AIX-64-NEXT: xvadddp v2, vs0, v2 +; AIX-64-NEXT: blr +; +; AIX-32-LABEL: test5: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: lwz r3, L..C3(r2) # @G +; AIX-32-NEXT: lfs f0, 4(r3) +; AIX-32-NEXT: lfs f1, 0(r3) +; AIX-32-NEXT: xxmrghd vs0, vs1, vs0 +; AIX-32-NEXT: xvadddp v2, vs0, v2 +; AIX-32-NEXT: blr entry: %0 = load <2 x float>, <2 x float>* @G, align 8 %1 = fpext <2 x float> %0 to <2 x double> @@ -144,6 +272,44 @@ define dso_local i32 @test6() #0 { ; CHECK-P10-BE-NEXT: bc 4, gt, .LBB5_2 ; CHECK-P10-BE-NEXT: # %bb.1: # %bb8 ; CHECK-P10-BE-NEXT: .LBB5_2: # %bb7 +; +; AIX-64-LABEL: test6: +; AIX-64: # %bb.0: # %bb +; AIX-64-NEXT: ld r3, L..C1(r2) # @Glob1 +; AIX-64-NEXT: lis r4, 8 +; AIX-64-NEXT: xxlxor vs1, vs1, vs1 +; AIX-64-NEXT: ori r4, r4, 38248 +; AIX-64-NEXT: lfdx f0, r3, r4 +; AIX-64-NEXT: xxmrghw vs0, vs0, vs0 +; AIX-64-NEXT: xvcvspdp vs0, vs0 +; AIX-64-NEXT: xvcmpeqdp v2, vs1, vs0 +; AIX-64-NEXT: xxswapd v3, v2 +; AIX-64-NEXT: xxland vs0, v2, v3 +; AIX-64-NEXT: mffprd r3, f0 +; AIX-64-NEXT: andi. r3, r3, 1 +; AIX-64-NEXT: bc 4, gt, L..BB5_2 +; AIX-64-NEXT: # %bb.1: # %bb8 +; AIX-64-NEXT: L..BB5_2: # %bb7 +; +; AIX-32-LABEL: test6: +; AIX-32: # %bb.0: # %bb +; AIX-32-NEXT: lwz r3, L..C4(r2) # @Glob1 +; AIX-32-NEXT: lis r4, 8 +; AIX-32-NEXT: ori r4, r4, 38248 +; AIX-32-NEXT: lfsux f0, r3, r4 +; AIX-32-NEXT: lfs f1, 4(r3) +; AIX-32-NEXT: addi r3, r1, -16 +; AIX-32-NEXT: xxmrghd vs0, vs0, vs1 +; AIX-32-NEXT: xxlxor vs1, vs1, vs1 +; AIX-32-NEXT: xvcmpeqdp v2, vs1, vs0 +; AIX-32-NEXT: xxswapd v3, v2 +; AIX-32-NEXT: xxland vs0, v2, v3 +; AIX-32-NEXT: stxvw4x vs0, 0, r3 +; AIX-32-NEXT: lwz r3, -12(r1) +; AIX-32-NEXT: andi. r3, r3, 1 +; AIX-32-NEXT: bc 4, gt, L..BB5_2 +; AIX-32-NEXT: # %bb.1: # %bb8 +; AIX-32-NEXT: L..BB5_2: # %bb7 bb: br label %bb1 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll index b4e62073ec29..e0fd7526faaf 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -8,6 +8,19 @@ ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-ibm-aix-xcoff< %s | FileCheck %s \ +; RUN: --check-prefixes=P9-AIX,P9-AIX-64 +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefixes=P9-AIX,P9-AIX-32 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefixes=P8-AIX-64 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: --check-prefixes=P8-AIX-32 + ; Function Attrs: norecurse nounwind readonly define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { ; P9LE-LABEL: s2v_test1: @@ -41,6 +54,32 @@ define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { ; P8BE-NEXT: lxvw4x v3, 0, r4 ; P8BE-NEXT: vperm v2, v4, v2, v3 ; P8BE-NEXT: blr +; +; P9-AIX-LABEL: s2v_test1: +; P9-AIX: # %bb.0: # %entry +; P9-AIX-NEXT: lwz r3, 0(r3) +; P9-AIX-NEXT: mtfprwz f0, r3 +; P9-AIX-NEXT: xxinsertw v2, vs0, 0 +; P9-AIX-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test1: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r4, L..C0(r2) # %const.0 +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-64-NEXT: vperm v2, v3, v2, v4 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test1: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r3, 0(r3) +; P8-AIX-32-NEXT: lwz r4, L..C0(r2) # %const.0 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -16 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r4 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 +; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: blr entry: %0 = load i32, i32* %int32, align 4 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 @@ -82,6 +121,33 @@ define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { ; P8BE-NEXT: lxvw4x v3, 0, r4 ; P8BE-NEXT: vperm v2, v4, v2, v3 ; P8BE-NEXT: blr +; +; P9-AIX-LABEL: s2v_test2: +; P9-AIX: # %bb.0: # %entry +; P9-AIX-NEXT: lwz r3, 4(r3) +; P9-AIX-NEXT: mtfprwz f0, r3 +; P9-AIX-NEXT: xxinsertw v2, vs0, 0 +; P9-AIX-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test2: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r4, L..C1(r2) # %const.0 +; P8-AIX-64-NEXT: addi r3, r3, 4 +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-64-NEXT: vperm v2, v3, v2, v4 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test2: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r3, 4(r3) +; P8-AIX-32-NEXT: lwz r4, L..C1(r2) # %const.0 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -16 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r4 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 +; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 %0 = load i32, i32* %arrayidx, align 4 @@ -126,6 +192,43 @@ define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 ; P8BE-NEXT: lxvw4x v4, 0, r4 ; P8BE-NEXT: vperm v2, v3, v2, v4 ; P8BE-NEXT: blr +; +; P9-AIX-64-LABEL: s2v_test3: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: sldi r4, r4, 2 +; P9-AIX-64-NEXT: lwzx r3, r3, r4 +; P9-AIX-64-NEXT: mtfprwz f0, r3 +; P9-AIX-64-NEXT: xxinsertw v2, vs0, 0 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: s2v_test3: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: slwi r4, r4, 2 +; P9-AIX-32-NEXT: lwzx r3, r3, r4 +; P9-AIX-32-NEXT: mtfprwz f0, r3 +; P9-AIX-32-NEXT: xxinsertw v2, vs0, 0 +; P9-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test3: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r5, L..C2(r2) # %const.0 +; P8-AIX-64-NEXT: sldi r4, r4, 2 +; P8-AIX-64-NEXT: lxsiwzx v3, r3, r4 +; P8-AIX-64-NEXT: lxvw4x v4, 0, r5 +; P8-AIX-64-NEXT: vperm v2, v3, v2, v4 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test3: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: slwi r4, r4, 2 +; P8-AIX-32-NEXT: lwzx r3, r3, r4 +; P8-AIX-32-NEXT: lwz r4, L..C2(r2) # %const.0 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -16 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r4 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 +; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: blr entry: %idxprom = sext i32 %Idx to i64 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom @@ -169,6 +272,33 @@ define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { ; P8BE-NEXT: lxvw4x v3, 0, r4 ; P8BE-NEXT: vperm v2, v4, v2, v3 ; P8BE-NEXT: blr +; +; P9-AIX-LABEL: s2v_test4: +; P9-AIX: # %bb.0: # %entry +; P9-AIX-NEXT: lwz r3, 4(r3) +; P9-AIX-NEXT: mtfprwz f0, r3 +; P9-AIX-NEXT: xxinsertw v2, vs0, 0 +; P9-AIX-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test4: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r4, L..C3(r2) # %const.0 +; P8-AIX-64-NEXT: addi r3, r3, 4 +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-64-NEXT: vperm v2, v3, v2, v4 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test4: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r3, 4(r3) +; P8-AIX-32-NEXT: lwz r4, L..C3(r2) # %const.0 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -16 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r4 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 +; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 %0 = load i32, i32* %arrayidx, align 4 @@ -209,6 +339,32 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { ; P8BE-NEXT: lxvw4x v3, 0, r3 ; P8BE-NEXT: vperm v2, v4, v2, v3 ; P8BE-NEXT: blr +; +; P9-AIX-LABEL: s2v_test5: +; P9-AIX: # %bb.0: # %entry +; P9-AIX-NEXT: lwz r3, 0(r3) +; P9-AIX-NEXT: mtfprwz f0, r3 +; P9-AIX-NEXT: xxinsertw v2, vs0, 0 +; P9-AIX-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test5: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r4, L..C4(r2) # %const.0 +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-64-NEXT: vperm v2, v3, v2, v4 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test5: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lwz r3, 0(r3) +; P8-AIX-32-NEXT: lwz r4, L..C4(r2) # %const.0 +; P8-AIX-32-NEXT: stw r3, -16(r1) +; P8-AIX-32-NEXT: addi r3, r1, -16 +; P8-AIX-32-NEXT: lxvw4x v3, 0, r4 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r3 +; P8-AIX-32-NEXT: vperm v2, v4, v2, v3 +; P8-AIX-32-NEXT: blr entry: %0 = load i32, i32* %ptr1, align 4 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 @@ -248,6 +404,30 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec ; P8BE-NEXT: lxvw4x v3, 0, r4 ; P8BE-NEXT: vperm v2, v4, v2, v3 ; P8BE-NEXT: blr +; +; P9-AIX-LABEL: s2v_test_f1: +; P9-AIX: # %bb.0: # %entry +; P9-AIX-NEXT: lwz r3, 0(r3) +; P9-AIX-NEXT: mtfprwz f0, r3 +; P9-AIX-NEXT: xxinsertw v2, vs0, 0 +; P9-AIX-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test_f1: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: ld r4, L..C5(r2) # %const.0 +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-64-NEXT: vperm v2, v3, v2, v4 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test_f1: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lfs f0, 0(r3) +; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0 +; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: lxvw4x v4, 0, r4 +; P8-AIX-32-NEXT: vperm v2, v3, v2, v4 +; P8-AIX-32-NEXT: blr entry: %0 = load float, float* %f64, align 4 %vecins = insertelement <4 x float> %vec, float %0, i32 0 @@ -285,6 +465,34 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec ; P8BE-NEXT: lxsiwzx v3, 0, r3 ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr +; +; P9-AIX-64-LABEL: s2v_test_f2: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: addi r3, r3, 4 +; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P9-AIX-64-NEXT: vmrgow v2, v3, v2 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: s2v_test_f2: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: lfs f0, 4(r3) +; P9-AIX-32-NEXT: xscvdpspn v3, f0 +; P9-AIX-32-NEXT: vmrgow v2, v3, v2 +; P9-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test_f2: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: addi r3, r3, 4 +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: vmrgow v2, v3, v2 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test_f2: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lfs f0, 4(r3) +; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: vmrgow v2, v3, v2 +; P8-AIX-32-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -323,6 +531,36 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec ; P8BE-NEXT: lxsiwzx v3, r3, r4 ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr +; +; P9-AIX-64-LABEL: s2v_test_f3: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: sldi r4, r4, 2 +; P9-AIX-64-NEXT: lxsiwzx v3, r3, r4 +; P9-AIX-64-NEXT: vmrgow v2, v3, v2 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: s2v_test_f3: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: slwi r4, r4, 2 +; P9-AIX-32-NEXT: lfsx f0, r3, r4 +; P9-AIX-32-NEXT: xscvdpspn v3, f0 +; P9-AIX-32-NEXT: vmrgow v2, v3, v2 +; P9-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test_f3: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: sldi r4, r4, 2 +; P8-AIX-64-NEXT: lxsiwzx v3, r3, r4 +; P8-AIX-64-NEXT: vmrgow v2, v3, v2 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test_f3: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: slwi r4, r4, 2 +; P8-AIX-32-NEXT: lfsx f0, r3, r4 +; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: vmrgow v2, v3, v2 +; P8-AIX-32-NEXT: blr entry: %idxprom = sext i32 %Idx to i64 %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom @@ -362,6 +600,34 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec ; P8BE-NEXT: lxsiwzx v3, 0, r3 ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr +; +; P9-AIX-64-LABEL: s2v_test_f4: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: addi r3, r3, 4 +; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P9-AIX-64-NEXT: vmrgow v2, v3, v2 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: s2v_test_f4: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: lfs f0, 4(r3) +; P9-AIX-32-NEXT: xscvdpspn v3, f0 +; P9-AIX-32-NEXT: vmrgow v2, v3, v2 +; P9-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test_f4: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: addi r3, r3, 4 +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: vmrgow v2, v3, v2 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test_f4: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lfs f0, 4(r3) +; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: vmrgow v2, v3, v2 +; P8-AIX-32-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 %0 = load float, float* %arrayidx, align 8 @@ -396,6 +662,32 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr ; P8BE-NEXT: lxsiwzx v3, 0, r5 ; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr +; +; P9-AIX-64-LABEL: s2v_test_f5: +; P9-AIX-64: # %bb.0: # %entry +; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P9-AIX-64-NEXT: vmrgow v2, v3, v2 +; P9-AIX-64-NEXT: blr +; +; P9-AIX-32-LABEL: s2v_test_f5: +; P9-AIX-32: # %bb.0: # %entry +; P9-AIX-32-NEXT: lfs f0, 0(r3) +; P9-AIX-32-NEXT: xscvdpspn v3, f0 +; P9-AIX-32-NEXT: vmrgow v2, v3, v2 +; P9-AIX-32-NEXT: blr +; +; P8-AIX-64-LABEL: s2v_test_f5: +; P8-AIX-64: # %bb.0: # %entry +; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3 +; P8-AIX-64-NEXT: vmrgow v2, v3, v2 +; P8-AIX-64-NEXT: blr +; +; P8-AIX-32-LABEL: s2v_test_f5: +; P8-AIX-32: # %bb.0: # %entry +; P8-AIX-32-NEXT: lfs f0, 0(r3) +; P8-AIX-32-NEXT: xscvdpspn v3, f0 +; P8-AIX-32-NEXT: vmrgow v2, v3, v2 +; P8-AIX-32-NEXT: blr entry: %0 = load float, float* %ptr1, align 8 %vecins = insertelement <2 x float> %vec, float %0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll index 9bbb09ac3a96..225367f5a886 100644 --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -8,6 +8,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-P9 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefixes=AIX-P8,AIX-P8-64 +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefixes=AIX-P8,AIX-P8-32 ; Byte indexed @@ -30,6 +36,24 @@ define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) { ; CHECK-P9-NEXT: stbx r5, r4, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testByte: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: clrldi r4, r4, 60 +; AIX-P8-64-NEXT: stxvw4x v2, 0, r5 +; AIX-P8-64-NEXT: stbx r3, r5, r4 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testByte: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: addi r3, r1, -16 +; AIX-P8-32-NEXT: clrlwi r5, r6, 28 +; AIX-P8-32-NEXT: stxvw4x v2, 0, r3 +; AIX-P8-32-NEXT: stbx r4, r3, r5 +; AIX-P8-32-NEXT: lxvw4x v2, 0, r3 +; AIX-P8-32-NEXT: blr entry: %conv = trunc i64 %b to i8 %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx @@ -59,6 +83,24 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) { ; CHECK-P9-NEXT: sthx r5, r4, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testHalf: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: rlwinm r4, r4, 1, 28, 30 +; AIX-P8-64-NEXT: stxvw4x v2, 0, r5 +; AIX-P8-64-NEXT: sthx r3, r5, r4 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testHalf: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: addi r3, r1, -16 +; AIX-P8-32-NEXT: rlwinm r5, r6, 1, 28, 30 +; AIX-P8-32-NEXT: stxvw4x v2, 0, r3 +; AIX-P8-32-NEXT: sthx r4, r3, r5 +; AIX-P8-32-NEXT: lxvw4x v2, 0, r3 +; AIX-P8-32-NEXT: blr entry: %conv = trunc i64 %b to i16 %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx @@ -88,6 +130,24 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) { ; CHECK-P9-NEXT: stwx r5, r4, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testWord: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-64-NEXT: stxvw4x v2, 0, r5 +; AIX-P8-64-NEXT: stwx r3, r5, r4 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testWord: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: addi r3, r1, -16 +; AIX-P8-32-NEXT: rlwinm r5, r6, 2, 28, 29 +; AIX-P8-32-NEXT: stxvw4x v2, 0, r3 +; AIX-P8-32-NEXT: stwx r4, r3, r5 +; AIX-P8-32-NEXT: lxvw4x v2, 0, r3 +; AIX-P8-32-NEXT: blr entry: %conv = trunc i64 %b to i32 %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx @@ -115,6 +175,30 @@ define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) { ; CHECK-P9-NEXT: xxinsertw v2, vs0, 4 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 12 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testWordImm: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: ld r4, L..C0(r2) # %const.0 +; AIX-P8-64-NEXT: mtvsrwz v4, r3 +; AIX-P8-64-NEXT: ld r3, L..C1(r2) # %const.1 +; AIX-P8-64-NEXT: lxvw4x v3, 0, r4 +; AIX-P8-64-NEXT: vperm v2, v2, v4, v3 +; AIX-P8-64-NEXT: lxvw4x v3, 0, r3 +; AIX-P8-64-NEXT: vperm v2, v2, v4, v3 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testWordImm: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lwz r3, L..C0(r2) # %const.0 +; AIX-P8-32-NEXT: stw r4, -16(r1) +; AIX-P8-32-NEXT: addi r4, r1, -16 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r3 +; AIX-P8-32-NEXT: lwz r3, L..C1(r2) # %const.1 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r3 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 +; AIX-P8-32-NEXT: blr entry: %conv = trunc i64 %b to i32 %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1 @@ -145,6 +229,31 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) { ; CHECK-P9-NEXT: stdx r5, r4, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testDoubleword: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: addi r5, r1, -16 +; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-64-NEXT: stxvd2x v2, 0, r5 +; AIX-P8-64-NEXT: stdx r3, r5, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r5 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testDoubleword: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: add r6, r6, r6 +; AIX-P8-32-NEXT: addi r5, r1, -32 +; AIX-P8-32-NEXT: rlwinm r7, r6, 2, 28, 29 +; AIX-P8-32-NEXT: stxvw4x v2, 0, r5 +; AIX-P8-32-NEXT: stwx r3, r5, r7 +; AIX-P8-32-NEXT: addi r3, r6, 1 +; AIX-P8-32-NEXT: lxvw4x vs0, 0, r5 +; AIX-P8-32-NEXT: addi r5, r1, -16 +; AIX-P8-32-NEXT: rlwinm r3, r3, 2, 28, 29 +; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-32-NEXT: stwx r4, r5, r3 +; AIX-P8-32-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-32-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx ret <2 x i64> %vecins @@ -168,6 +277,28 @@ define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) { ; CHECK-P9-NEXT: mtfprd f0, r5 ; CHECK-P9-NEXT: xxmrghd v2, v2, vs0 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testDoublewordImm: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: mtfprd f0, r3 +; AIX-P8-64-NEXT: xxmrghd v2, v2, vs0 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testDoublewordImm: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lwz r5, L..C2(r2) # %const.0 +; AIX-P8-32-NEXT: stw r3, -16(r1) +; AIX-P8-32-NEXT: stw r4, -32(r1) +; AIX-P8-32-NEXT: addi r3, r1, -16 +; AIX-P8-32-NEXT: addi r4, r1, -32 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-32-NEXT: lwz r3, L..C3(r2) # %const.1 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r5 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r3 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 +; AIX-P8-32-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i32 1 ret <2 x i64> %vecins @@ -189,6 +320,28 @@ define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) { ; CHECK-P9-NEXT: mtfprd f0, r5 ; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testDoublewordImm2: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: mtfprd f0, r3 +; AIX-P8-64-NEXT: xxpermdi v2, vs0, v2, 1 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testDoublewordImm2: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lwz r5, L..C4(r2) # %const.0 +; AIX-P8-32-NEXT: stw r3, -16(r1) +; AIX-P8-32-NEXT: stw r4, -32(r1) +; AIX-P8-32-NEXT: addi r3, r1, -16 +; AIX-P8-32-NEXT: addi r4, r1, -32 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-32-NEXT: lwz r3, L..C5(r2) # %const.1 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r5 +; AIX-P8-32-NEXT: vperm v2, v4, v2, v3 +; AIX-P8-32-NEXT: lxvw4x v3, 0, r3 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-32-NEXT: vperm v2, v2, v4, v3 +; AIX-P8-32-NEXT: blr entry: %vecins = insertelement <2 x i64> %a, i64 %b, i32 0 ret <2 x i64> %vecins @@ -221,6 +374,15 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { ; CHECK-P9-NEXT: stfsx f1, r4, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-LABEL: testFloat1: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: addi r3, r1, -16 +; AIX-P8-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-NEXT: stxvw4x v2, 0, r3 +; AIX-P8-NEXT: stfsx f1, r3, r4 +; AIX-P8-NEXT: lxvw4x v2, 0, r3 +; AIX-P8-NEXT: blr entry: %vecins = insertelement <4 x float> %a, float %b, i32 %idx1 ret <4 x float> %vecins @@ -266,6 +428,38 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze ; CHECK-P9-NEXT: stwx r3, r5, r4 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testFloat2: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: lwz r7, 0(r3) +; AIX-P8-64-NEXT: addi r6, r1, -32 +; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 +; AIX-P8-64-NEXT: stxvw4x v2, 0, r6 +; AIX-P8-64-NEXT: stwx r7, r6, r4 +; AIX-P8-64-NEXT: addi r4, r1, -16 +; AIX-P8-64-NEXT: lxvw4x vs0, 0, r6 +; AIX-P8-64-NEXT: lwz r3, 1(r3) +; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 +; AIX-P8-64-NEXT: stwx r3, r4, r5 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testFloat2: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lwz r7, 0(r3) +; AIX-P8-32-NEXT: addi r6, r1, -32 +; AIX-P8-32-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-32-NEXT: stxvw4x v2, 0, r6 +; AIX-P8-32-NEXT: stwx r7, r6, r4 +; AIX-P8-32-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-32-NEXT: addi r5, r1, -16 +; AIX-P8-32-NEXT: lxvw4x vs0, 0, r6 +; AIX-P8-32-NEXT: lwz r3, 1(r3) +; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-32-NEXT: stwx r3, r5, r4 +; AIX-P8-32-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-32-NEXT: blr entry: %0 = bitcast i8* %b to float* %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 @@ -324,6 +518,42 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze ; CHECK-P9-NEXT: stwx r3, r5, r4 ; CHECK-P9-NEXT: lxv v2, -32(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testFloat3: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: lis r6, 1 +; AIX-P8-64-NEXT: addi r7, r1, -32 +; AIX-P8-64-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-64-NEXT: rlwinm r5, r5, 2, 28, 29 +; AIX-P8-64-NEXT: lwzx r6, r3, r6 +; AIX-P8-64-NEXT: stxvw4x v2, 0, r7 +; AIX-P8-64-NEXT: stwx r6, r7, r4 +; AIX-P8-64-NEXT: li r4, 1 +; AIX-P8-64-NEXT: lxvw4x vs0, 0, r7 +; AIX-P8-64-NEXT: rldic r4, r4, 36, 27 +; AIX-P8-64-NEXT: lwzx r3, r3, r4 +; AIX-P8-64-NEXT: addi r4, r1, -16 +; AIX-P8-64-NEXT: stxvw4x vs0, 0, r4 +; AIX-P8-64-NEXT: stwx r3, r4, r5 +; AIX-P8-64-NEXT: lxvw4x v2, 0, r4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testFloat3: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lis r6, 1 +; AIX-P8-32-NEXT: rlwinm r4, r4, 2, 28, 29 +; AIX-P8-32-NEXT: addi r7, r1, -32 +; AIX-P8-32-NEXT: lwzx r6, r3, r6 +; AIX-P8-32-NEXT: stxvw4x v2, 0, r7 +; AIX-P8-32-NEXT: stwx r6, r7, r4 +; AIX-P8-32-NEXT: rlwinm r4, r5, 2, 28, 29 +; AIX-P8-32-NEXT: addi r5, r1, -16 +; AIX-P8-32-NEXT: lxvw4x vs0, 0, r7 +; AIX-P8-32-NEXT: lwz r3, 0(r3) +; AIX-P8-32-NEXT: stxvw4x vs0, 0, r5 +; AIX-P8-32-NEXT: stwx r3, r5, r4 +; AIX-P8-32-NEXT: lxvw4x v2, 0, r5 +; AIX-P8-32-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 %0 = bitcast i8* %add.ptr to float* @@ -359,6 +589,28 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) { ; CHECK-P9-NEXT: xxinsertw v2, vs0, 0 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testFloatImm1: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: ld r3, L..C2(r2) # %const.0 +; AIX-P8-64-NEXT: xscvdpspn v3, f1 +; AIX-P8-64-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-64-NEXT: ld r3, L..C3(r2) # %const.1 +; AIX-P8-64-NEXT: vperm v2, v3, v2, v4 +; AIX-P8-64-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-64-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testFloatImm1: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lwz r3, L..C6(r2) # %const.0 +; AIX-P8-32-NEXT: xscvdpspn v3, f1 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-32-NEXT: lwz r3, L..C7(r2) # %const.1 +; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-32-NEXT: blr entry: %vecins = insertelement <4 x float> %a, float %b, i32 0 %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2 @@ -391,6 +643,33 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) { ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testFloatImm2: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: ld r4, L..C4(r2) # %const.0 +; AIX-P8-64-NEXT: lxsiwzx v3, 0, r3 +; AIX-P8-64-NEXT: li r5, 4 +; AIX-P8-64-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-64-NEXT: ld r4, L..C5(r2) # %const.1 +; AIX-P8-64-NEXT: vperm v2, v3, v2, v4 +; AIX-P8-64-NEXT: lxsiwzx v3, r3, r5 +; AIX-P8-64-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-64-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testFloatImm2: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lfs f0, 0(r3) +; AIX-P8-32-NEXT: lwz r4, L..C8(r2) # %const.0 +; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-32-NEXT: lfs f0, 4(r3) +; AIX-P8-32-NEXT: lwz r3, L..C9(r2) # %const.1 +; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-32-NEXT: blr entry: %0 = bitcast i32* %b to float* %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1 @@ -435,6 +714,36 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) { ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 8 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testFloatImm3: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: ld r4, L..C6(r2) # %const.0 +; AIX-P8-64-NEXT: lis r5, 4 +; AIX-P8-64-NEXT: lxsiwzx v3, r3, r5 +; AIX-P8-64-NEXT: li r5, 1 +; AIX-P8-64-NEXT: rldic r5, r5, 38, 25 +; AIX-P8-64-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-64-NEXT: ld r4, L..C7(r2) # %const.1 +; AIX-P8-64-NEXT: vperm v2, v3, v2, v4 +; AIX-P8-64-NEXT: lxsiwzx v3, r3, r5 +; AIX-P8-64-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-64-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testFloatImm3: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lis r4, 4 +; AIX-P8-32-NEXT: lfsx f0, r3, r4 +; AIX-P8-32-NEXT: lwz r4, L..C10(r2) # %const.0 +; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r4 +; AIX-P8-32-NEXT: lfs f0, 0(r3) +; AIX-P8-32-NEXT: lwz r3, L..C11(r2) # %const.1 +; AIX-P8-32-NEXT: vperm v2, v3, v2, v4 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-32-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 %0 = bitcast i32* %add.ptr to float* @@ -474,6 +783,24 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) ; CHECK-P9-NEXT: stfdx f1, r4, r3 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testDouble1: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: addi r3, r1, -16 +; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-64-NEXT: stxvd2x v2, 0, r3 +; AIX-P8-64-NEXT: stfdx f1, r3, r4 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r3 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testDouble1: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: addi r3, r1, -16 +; AIX-P8-32-NEXT: rlwinm r4, r5, 3, 28, 28 +; AIX-P8-32-NEXT: stxvd2x v2, 0, r3 +; AIX-P8-32-NEXT: stfdx f1, r3, r4 +; AIX-P8-32-NEXT: lxvd2x v2, 0, r3 +; AIX-P8-32-NEXT: blr entry: %vecins = insertelement <2 x double> %a, double %b, i32 %idx1 ret <2 x double> %vecins @@ -520,6 +847,39 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 ; CHECK-P9-NEXT: stdx r3, r5, r4 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testDouble2: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: ld r7, 0(r3) +; AIX-P8-64-NEXT: addi r6, r1, -32 +; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 +; AIX-P8-64-NEXT: stxvd2x v2, 0, r6 +; AIX-P8-64-NEXT: stdx r7, r6, r4 +; AIX-P8-64-NEXT: li r4, 1 +; AIX-P8-64-NEXT: lxvd2x vs0, 0, r6 +; AIX-P8-64-NEXT: ldx r3, r3, r4 +; AIX-P8-64-NEXT: addi r4, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 +; AIX-P8-64-NEXT: stdx r3, r4, r5 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testDouble2: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lfd f0, 0(r3) +; AIX-P8-32-NEXT: addi r6, r1, -32 +; AIX-P8-32-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-32-NEXT: stxvd2x v2, 0, r6 +; AIX-P8-32-NEXT: stfdx f0, r6, r4 +; AIX-P8-32-NEXT: addi r4, r1, -16 +; AIX-P8-32-NEXT: lxvd2x vs0, 0, r6 +; AIX-P8-32-NEXT: lfd f1, 1(r3) +; AIX-P8-32-NEXT: rlwinm r3, r5, 3, 28, 28 +; AIX-P8-32-NEXT: stxvd2x vs0, 0, r4 +; AIX-P8-32-NEXT: stfdx f1, r4, r3 +; AIX-P8-32-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-32-NEXT: blr entry: %0 = bitcast i8* %b to double* %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 @@ -578,6 +938,42 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 ; CHECK-P9-NEXT: stdx r3, r5, r4 ; CHECK-P9-NEXT: lxv v2, -16(r1) ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testDouble3: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: lis r6, 1 +; AIX-P8-64-NEXT: addi r7, r1, -32 +; AIX-P8-64-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-64-NEXT: li r8, 1 +; AIX-P8-64-NEXT: rlwinm r5, r5, 3, 28, 28 +; AIX-P8-64-NEXT: ldx r6, r3, r6 +; AIX-P8-64-NEXT: stxvd2x v2, 0, r7 +; AIX-P8-64-NEXT: stdx r6, r7, r4 +; AIX-P8-64-NEXT: rldic r4, r8, 36, 27 +; AIX-P8-64-NEXT: lxvd2x vs0, 0, r7 +; AIX-P8-64-NEXT: ldx r3, r3, r4 +; AIX-P8-64-NEXT: addi r4, r1, -16 +; AIX-P8-64-NEXT: stxvd2x vs0, 0, r4 +; AIX-P8-64-NEXT: stdx r3, r4, r5 +; AIX-P8-64-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testDouble3: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lis r6, 1 +; AIX-P8-32-NEXT: rlwinm r4, r4, 3, 28, 28 +; AIX-P8-32-NEXT: lfdx f0, r3, r6 +; AIX-P8-32-NEXT: addi r6, r1, -32 +; AIX-P8-32-NEXT: stxvd2x v2, 0, r6 +; AIX-P8-32-NEXT: stfdx f0, r6, r4 +; AIX-P8-32-NEXT: addi r4, r1, -16 +; AIX-P8-32-NEXT: lxvd2x vs0, 0, r6 +; AIX-P8-32-NEXT: lfd f1, 0(r3) +; AIX-P8-32-NEXT: rlwinm r3, r5, 3, 28, 28 +; AIX-P8-32-NEXT: stxvd2x vs0, 0, r4 +; AIX-P8-32-NEXT: stfdx f1, r4, r3 +; AIX-P8-32-NEXT: lxvd2x v2, 0, r4 +; AIX-P8-32-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 %0 = bitcast i8* %add.ptr to double* @@ -610,6 +1006,12 @@ define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) { ; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-P9-NEXT: xxpermdi v2, vs1, v2, 1 ; CHECK-P9-NEXT: blr +; +; AIX-P8-LABEL: testDoubleImm1: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; AIX-P8-NEXT: xxpermdi v2, vs1, v2, 1 +; AIX-P8-NEXT: blr entry: %vecins = insertelement <2 x double> %a, double %b, i32 0 ret <2 x double> %vecins @@ -633,6 +1035,12 @@ define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) { ; CHECK-P9-NEXT: lfd f0, 0(r5) ; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-P9-NEXT: blr +; +; AIX-P8-LABEL: testDoubleImm2: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: lfd f0, 0(r3) +; AIX-P8-NEXT: xxpermdi v2, vs0, v2, 1 +; AIX-P8-NEXT: blr entry: %0 = bitcast i32* %b to double* %1 = load double, double* %0, align 8 @@ -658,6 +1066,12 @@ define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) { ; CHECK-P9-NEXT: lfd f0, 4(r5) ; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-P9-NEXT: blr +; +; AIX-P8-LABEL: testDoubleImm3: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: lfd f0, 4(r3) +; AIX-P8-NEXT: xxpermdi v2, vs0, v2, 1 +; AIX-P8-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 1 %0 = bitcast i32* %add.ptr to double* @@ -685,6 +1099,13 @@ define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) { ; CHECK-P9-NEXT: lfdx f0, r5, r3 ; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-P9-NEXT: blr +; +; AIX-P8-LABEL: testDoubleImm4: +; AIX-P8: # %bb.0: # %entry +; AIX-P8-NEXT: lis r4, 4 +; AIX-P8-NEXT: lfdx f0, r3, r4 +; AIX-P8-NEXT: xxpermdi v2, vs0, v2, 1 +; AIX-P8-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 %0 = bitcast i32* %add.ptr to double* @@ -717,6 +1138,20 @@ define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) { ; CHECK-P9-NEXT: lfdx f0, r5, r3 ; CHECK-P9-NEXT: xxpermdi v2, vs0, v2, 1 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testDoubleImm5: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: li r4, 1 +; AIX-P8-64-NEXT: rldic r4, r4, 38, 25 +; AIX-P8-64-NEXT: lfdx f0, r3, r4 +; AIX-P8-64-NEXT: xxpermdi v2, vs0, v2, 1 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testDoubleImm5: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: lfd f0, 0(r3) +; AIX-P8-32-NEXT: xxpermdi v2, vs0, v2, 1 +; AIX-P8-32-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736 %0 = bitcast i32* %add.ptr to double* @@ -743,6 +1178,24 @@ define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) ; CHECK-P9-NEXT: xscvdpsp f0, f1 ; CHECK-P9-NEXT: xxinsertw v2, vs0, 4 ; CHECK-P9-NEXT: blr +; +; AIX-P8-64-LABEL: testInsertDoubleToFloat: +; AIX-P8-64: # %bb.0: # %entry +; AIX-P8-64-NEXT: xsrsp f0, f1 +; AIX-P8-64-NEXT: ld r3, L..C8(r2) # %const.0 +; AIX-P8-64-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-64-NEXT: xscvdpspn v3, f0 +; AIX-P8-64-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-64-NEXT: blr +; +; AIX-P8-32-LABEL: testInsertDoubleToFloat: +; AIX-P8-32: # %bb.0: # %entry +; AIX-P8-32-NEXT: xsrsp f0, f1 +; AIX-P8-32-NEXT: lwz r3, L..C12(r2) # %const.0 +; AIX-P8-32-NEXT: lxvw4x v4, 0, r3 +; AIX-P8-32-NEXT: xscvdpspn v3, f0 +; AIX-P8-32-NEXT: vperm v2, v2, v3, v4 +; AIX-P8-32-NEXT: blr entry: %conv = fptrunc double %b to float %vecins = insertelement <4 x float> %a, float %conv, i32 1