[NFC][PowerPC] Added test to check regsiter allocation for ACC registers

ACC regsiters are a combination of 4 consecutive vector regsiters and therefore
somtimes require special treatment for register allocation. This patch only
adds a test.
This commit is contained in:
Stefan Pintilie 2021-07-13 21:15:30 -05:00
parent 810e4c3c66
commit cf0aa0b66c
1 changed files with 352 additions and 0 deletions

View File

@ -0,0 +1,352 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s --check-prefix=TRACKLIVE
%0 = type <{ double }>
%1 = type <{ double }>
define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unnamed_addr {
; CHECK-LABEL: acc_regalloc:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: lwz r3, 0(r3)
; CHECK-NEXT: lxv vs0, 0(0)
; CHECK-NEXT: xxlxor vs2, vs2, vs2
; CHECK-NEXT: xxlxor vs3, vs3, vs3
; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
; CHECK-NEXT: xxlxor v2, v2, v2
; CHECK-NEXT: li r6, 1
; CHECK-NEXT: li r4, 16
; CHECK-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
; CHECK-NEXT: extswsli r3, r3, 3
; CHECK-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
; CHECK-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill
; CHECK-NEXT: xvmaddadp vs3, vs0, vs3
; CHECK-NEXT: lxvdsx vs1, 0, r3
; CHECK-NEXT: xvmaddadp vs2, vs1, vs2
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb9
; CHECK-NEXT: #
; CHECK-NEXT: addi r6, r6, 2
; CHECK-NEXT: lxv vs5, -64(r5)
; CHECK-NEXT: lxv vs6, -16(r5)
; CHECK-NEXT: lxv vs4, 16(0)
; CHECK-NEXT: xxlor v7, vs2, vs2
; CHECK-NEXT: xxlxor v8, v8, v8
; CHECK-NEXT: xxlxor v1, v1, v1
; CHECK-NEXT: mulld r6, r6, r3
; CHECK-NEXT: xvmaddadp v7, vs5, v2
; CHECK-NEXT: xxlxor v6, v6, v6
; CHECK-NEXT: xvmaddadp v8, vs6, v8
; CHECK-NEXT: xvmaddadp v1, vs4, vs1
; CHECK-NEXT: xvmuldp v0, vs4, v2
; CHECK-NEXT: xvmaddadp v1, v2, v2
; CHECK-NEXT: xvmaddadp v0, v2, v2
; CHECK-NEXT: lxvdsx v4, r6, r4
; CHECK-NEXT: xvmaddadp v6, vs5, v6
; CHECK-NEXT: li r6, 0
; CHECK-NEXT: xvmuldp v9, vs6, v4
; CHECK-NEXT: xvmuldp v3, vs5, v4
; CHECK-NEXT: xvmuldp v11, vs0, v4
; CHECK-NEXT: vmr v10, v2
; CHECK-NEXT: xvmuldp v5, v4, v2
; CHECK-NEXT: vmr v4, v2
; CHECK-NEXT: xxlor vs18, v8, v8
; CHECK-NEXT: xxlor vs4, v2, v2
; CHECK-NEXT: xxlor vs12, v10, v10
; CHECK-NEXT: xxlor vs13, v11, v11
; CHECK-NEXT: xxlor v10, vs3, vs3
; CHECK-NEXT: xxlor vs8, v4, v4
; CHECK-NEXT: xxlor vs9, v5, v5
; CHECK-NEXT: xxlor vs10, v0, v0
; CHECK-NEXT: xxlor vs11, v1, v1
; CHECK-NEXT: xxmtacc acc2
; CHECK-NEXT: xxlor vs19, v9, v9
; CHECK-NEXT: vmr v8, v2
; CHECK-NEXT: xxlor vs5, v3, v3
; CHECK-NEXT: xxlor vs6, v6, v6
; CHECK-NEXT: xxlor vs7, v7, v7
; CHECK-NEXT: xxlor vs14, v10, v10
; CHECK-NEXT: xxlor vs15, v11, v11
; CHECK-NEXT: xxlor vs16, v8, v8
; CHECK-NEXT: xxlor vs17, v9, v9
; CHECK-NEXT: xxmtacc acc1
; CHECK-NEXT: xxmtacc acc3
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT: xxmtacc acc4
; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT: xvf64gerpp acc4, vsp34, vs0
; CHECK-NEXT: xxmfacc acc1
; CHECK-NEXT: xxmfacc acc2
; CHECK-NEXT: xxmfacc acc3
; CHECK-NEXT: xxmfacc acc4
; CHECK-NEXT: stxv vs5, 0(r3)
; CHECK-NEXT: stxv vs13, 32(r3)
; CHECK-NEXT: stxv vs8, 16(0)
; CHECK-NEXT: stxv vs16, 48(0)
; CHECK-NEXT: b .LBB0_1
;
; TRACKLIVE-LABEL: acc_regalloc:
; TRACKLIVE: # %bb.0: # %bb
; TRACKLIVE-NEXT: lwz r3, 0(r3)
; TRACKLIVE-NEXT: lxv vs0, 0(0)
; TRACKLIVE-NEXT: xxlxor vs2, vs2, vs2
; TRACKLIVE-NEXT: xxlxor vs3, vs3, vs3
; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: xxlxor v2, v2, v2
; TRACKLIVE-NEXT: li r6, 1
; TRACKLIVE-NEXT: li r4, 16
; TRACKLIVE-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: extswsli r3, r3, 3
; TRACKLIVE-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT: xvmaddadp vs3, vs0, vs3
; TRACKLIVE-NEXT: lxvdsx vs1, 0, r3
; TRACKLIVE-NEXT: xvmaddadp vs2, vs1, vs2
; TRACKLIVE-NEXT: .p2align 4
; TRACKLIVE-NEXT: .LBB0_1: # %bb9
; TRACKLIVE-NEXT: #
; TRACKLIVE-NEXT: addi r6, r6, 2
; TRACKLIVE-NEXT: lxv vs4, 16(0)
; TRACKLIVE-NEXT: xxlxor v1, v1, v1
; TRACKLIVE-NEXT: lxv vs6, -16(r5)
; TRACKLIVE-NEXT: lxv vs5, -64(r5)
; TRACKLIVE-NEXT: xxlxor v8, v8, v8
; TRACKLIVE-NEXT: xxlor v7, vs2, vs2
; TRACKLIVE-NEXT: xxlxor v6, v6, v6
; TRACKLIVE-NEXT: mulld r6, r6, r3
; TRACKLIVE-NEXT: vmr v10, v2
; TRACKLIVE-NEXT: xxlor vs8, v10, v10
; TRACKLIVE-NEXT: xvmaddadp v1, vs4, vs1
; TRACKLIVE-NEXT: xvmuldp v0, vs4, v2
; TRACKLIVE-NEXT: xvmaddadp v8, vs6, v8
; TRACKLIVE-NEXT: xvmaddadp v7, vs5, v2
; TRACKLIVE-NEXT: xvmaddadp v6, vs5, v6
; TRACKLIVE-NEXT: xxlor vs4, v2, v2
; TRACKLIVE-NEXT: lxvdsx v4, r6, r4
; TRACKLIVE-NEXT: li r6, 0
; TRACKLIVE-NEXT: xvmaddadp v1, v2, v2
; TRACKLIVE-NEXT: xvmaddadp v0, v2, v2
; TRACKLIVE-NEXT: xxlor vs18, v8, v8
; TRACKLIVE-NEXT: vmr v8, v2
; TRACKLIVE-NEXT: xxlor vs7, v7, v7
; TRACKLIVE-NEXT: xxlor vs16, v8, v8
; TRACKLIVE-NEXT: xvmuldp v3, vs5, v4
; TRACKLIVE-NEXT: xvmuldp v5, vs0, v4
; TRACKLIVE-NEXT: xvmuldp v9, vs6, v4
; TRACKLIVE-NEXT: xvmuldp v11, v4, v2
; TRACKLIVE-NEXT: vmr v4, v2
; TRACKLIVE-NEXT: xxlor vs6, v6, v6
; TRACKLIVE-NEXT: xxlor vs12, v4, v4
; TRACKLIVE-NEXT: xxlor v4, vs3, vs3
; TRACKLIVE-NEXT: xxlor vs10, v0, v0
; TRACKLIVE-NEXT: xxlor vs11, v1, v1
; TRACKLIVE-NEXT: xxlor vs14, v4, v4
; TRACKLIVE-NEXT: xxlor vs5, v3, v3
; TRACKLIVE-NEXT: xxlor vs9, v11, v11
; TRACKLIVE-NEXT: xxlor vs13, v5, v5
; TRACKLIVE-NEXT: xxlor vs15, v5, v5
; TRACKLIVE-NEXT: xxlor vs19, v9, v9
; TRACKLIVE-NEXT: xxlor vs17, v9, v9
; TRACKLIVE-NEXT: xxmtacc acc1
; TRACKLIVE-NEXT: xxmtacc acc2
; TRACKLIVE-NEXT: xxmtacc acc3
; TRACKLIVE-NEXT: xxmtacc acc4
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT: xvf64gerpp acc4, vsp34, vs0
; TRACKLIVE-NEXT: xxmfacc acc1
; TRACKLIVE-NEXT: xxmfacc acc2
; TRACKLIVE-NEXT: xxmfacc acc3
; TRACKLIVE-NEXT: xxmfacc acc4
; TRACKLIVE-NEXT: stxv vs5, 0(r3)
; TRACKLIVE-NEXT: stxv vs13, 32(r3)
; TRACKLIVE-NEXT: stxv vs8, 16(0)
; TRACKLIVE-NEXT: stxv vs16, 48(0)
; TRACKLIVE-NEXT: b .LBB0_1
bb:
%i = load i32, i32* %arg, align 4
%i3 = sext i32 %i to i64
%i4 = shl nsw i64 %i3, 3
%i5 = bitcast [0 x %0]* %arg1 to i8*
%i6 = getelementptr i8, i8* %i5, i64 undef
%i7 = getelementptr [0 x %1], [0 x %1]* %arg2, i64 0, i64 -8
%i8 = getelementptr i8, i8* %i6, i64 undef
br label %bb9
bb9: ; preds = %bb95, %bb
%i10 = phi i64 [ 1, %bb ], [ 0, %bb95 ]
%i11 = getelementptr %1, %1* null, i64 2
%i12 = bitcast %1* %i11 to <2 x double>*
%i13 = load <2 x double>, <2 x double>* %i12, align 1
%i14 = add nuw nsw i64 %i10, 2
%i15 = getelementptr inbounds %1, %1* %i7, i64 undef
%i16 = bitcast %1* %i15 to <2 x double>*
%i17 = load <2 x double>, <2 x double>* %i16, align 1
%i18 = load <2 x double>, <2 x double>* null, align 1
%i19 = getelementptr %1, %1* %i15, i64 6
%i20 = bitcast %1* %i19 to <2 x double>*
%i21 = load <2 x double>, <2 x double>* %i20, align 1
%i22 = load i64, i64* undef, align 8
%i23 = insertelement <2 x i64> poison, i64 %i22, i32 0
%i24 = bitcast <2 x i64> %i23 to <2 x double>
%i25 = shufflevector <2 x double> %i24, <2 x double> undef, <2 x i32> zeroinitializer
%i26 = mul i64 %i14, %i4
%i27 = getelementptr i8, i8* null, i64 %i26
%i28 = getelementptr inbounds i8, i8* %i27, i64 0
%i29 = getelementptr i8, i8* %i28, i64 16
%i30 = bitcast i8* %i29 to i64*
%i31 = load i64, i64* %i30, align 8
%i32 = insertelement <2 x i64> poison, i64 %i31, i32 0
%i33 = bitcast <2 x i64> %i32 to <2 x double>
%i34 = shufflevector <2 x double> %i33, <2 x double> undef, <2 x i32> zeroinitializer
%i35 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %i25, <2 x double> zeroinitializer)
%i36 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i13, <2 x double> %i25, <2 x double> zeroinitializer)
%i37 = fmul contract <2 x double> %i13, zeroinitializer
%i38 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> %i35)
%i39 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i36)
%i40 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
%i41 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i37)
%i42 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i18, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
%i43 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i21, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
%i44 = fmul contract <2 x double> %i17, %i34
%i45 = fmul contract <2 x double> zeroinitializer, %i34
%i46 = fmul contract <2 x double> %i18, %i34
%i47 = fmul contract <2 x double> %i21, %i34
%i48 = bitcast <2 x double> %i44 to <16 x i8>
%i49 = bitcast <2 x double> %i40 to <16 x i8>
%i50 = bitcast <2 x double> %i38 to <16 x i8>
%i51 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i48, <16 x i8> %i49, <16 x i8> %i50)
%i52 = bitcast <2 x double> %i45 to <16 x i8>
%i53 = bitcast <2 x double> %i41 to <16 x i8>
%i54 = bitcast <2 x double> %i39 to <16 x i8>
%i55 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i52, <16 x i8> %i53, <16 x i8> %i54)
%i56 = bitcast <2 x double> %i46 to <16 x i8>
%i57 = bitcast <2 x double> %i42 to <16 x i8>
%i58 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i56, <16 x i8> %i57, <16 x i8> %i56)
%i59 = bitcast <2 x double> %i47 to <16 x i8>
%i60 = bitcast <2 x double> %i43 to <16 x i8>
%i61 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i59, <16 x i8> %i60, <16 x i8> %i59)
%i62 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i51, <256 x i1> undef, <16 x i8> undef)
%i63 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i55, <256 x i1> undef, <16 x i8> undef)
%i64 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i58, <256 x i1> undef, <16 x i8> undef)
%i65 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i61, <256 x i1> undef, <16 x i8> undef)
%i66 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i62, <256 x i1> undef, <16 x i8> undef)
%i67 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i63, <256 x i1> undef, <16 x i8> undef)
%i68 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i64, <256 x i1> undef, <16 x i8> undef)
%i69 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i65, <256 x i1> undef, <16 x i8> undef)
%i70 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i66, <256 x i1> undef, <16 x i8> undef)
%i71 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i67, <256 x i1> undef, <16 x i8> undef)
%i72 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i68, <256 x i1> undef, <16 x i8> undef)
%i73 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i69, <256 x i1> undef, <16 x i8> undef)
%i74 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i70, <256 x i1> undef, <16 x i8> undef)
%i75 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i71, <256 x i1> undef, <16 x i8> undef)
%i76 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i72, <256 x i1> undef, <16 x i8> undef)
%i77 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i73, <256 x i1> undef, <16 x i8> undef)
%i78 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i74, <256 x i1> undef, <16 x i8> undef)
%i79 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i75, <256 x i1> undef, <16 x i8> undef)
%i80 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i76, <256 x i1> undef, <16 x i8> undef)
%i81 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i77, <256 x i1> undef, <16 x i8> undef)
br label %bb82
bb82: ; preds = %bb82, %bb9
%i83 = phi <512 x i1> [ %i94, %bb82 ], [ %i81, %bb9 ]
%i84 = phi <512 x i1> [ %i93, %bb82 ], [ %i80, %bb9 ]
%i85 = phi <512 x i1> [ %i92, %bb82 ], [ %i79, %bb9 ]
%i86 = phi <512 x i1> [ %i91, %bb82 ], [ %i78, %bb9 ]
%i87 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i86, <256 x i1> undef, <16 x i8> undef)
%i88 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i85, <256 x i1> undef, <16 x i8> undef)
%i89 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i84, <256 x i1> undef, <16 x i8> undef)
%i90 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i83, <256 x i1> undef, <16 x i8> undef)
%i91 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i87, <256 x i1> undef, <16 x i8> undef)
%i92 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i88, <256 x i1> undef, <16 x i8> undef)
%i93 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i89, <256 x i1> undef, <16 x i8> undef)
%i94 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i90, <256 x i1> undef, <16 x i8> undef)
br i1 undef, label %bb95, label %bb82
bb95: ; preds = %bb82
%i96 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i91)
%i97 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i96, 2
%i98 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i92)
%i99 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i98, 3
%i100 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i93)
%i101 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i100, 2
%i102 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i94)
%i103 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i102, 3
%i104 = getelementptr inbounds i8, i8* %i8, i64 undef
%i105 = bitcast i8* %i104 to <16 x i8>*
store <16 x i8> %i97, <16 x i8>* %i105, align 1
%i106 = getelementptr i8, i8* %i104, i64 32
%i107 = bitcast i8* %i106 to <16 x i8>*
store <16 x i8> %i101, <16 x i8>* %i107, align 1
%i108 = getelementptr i8, i8* null, i64 16
%i109 = bitcast i8* %i108 to <16 x i8>*
store <16 x i8> %i99, <16 x i8>* %i109, align 1
%i110 = getelementptr i8, i8* null, i64 48
%i111 = bitcast i8* %i110 to <16 x i8>*
store <16 x i8> %i103, <16 x i8>* %i111, align 1
br label %bb9
}
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)