From f037b07b5c2e6d86124d23d16fca66c16a639d3d Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Mon, 12 Apr 2021 16:28:49 +0300 Subject: [PATCH] Revert "[AArch64] Add Machine InstCombiner patterns for FMUL indexed variant" This reverts commit cca9b5985c0c7e3c34da7f2db7cc8e7e707b0e2e. Buildbot reported an error for CodeGen/AArch64/machine-combiner-fmul-dup.mir: *** Bad machine code: Virtual register killed in block, but needed live out. *** - function: indexed_2s - basic block: %bb.0 entry (0x640fee8) Virtual register %7 is used after the block. *** Bad machine code: Virtual register defs don't dominate all uses. *** - function: indexed_2s - v. register: %7 LLVM ERROR: Found 2 machine code errors. --- .../llvm/CodeGen/MachineCombinerPattern.h | 13 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 132 +----- .../CodeGen/AArch64/arm64-fma-combines.ll | 127 +----- .../AArch64/machine-combiner-fmul-dup.mir | 378 ------------------ 4 files changed, 3 insertions(+), 647 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 67544779f34c..ac0cc70744d1 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -153,18 +153,7 @@ enum class MachineCombinerPattern { FMLSv4f32_OP1, FMLSv4f32_OP2, FMLSv4i32_indexed_OP1, - FMLSv4i32_indexed_OP2, - - FMULv2i32_indexed_OP1, - FMULv2i32_indexed_OP2, - FMULv2i64_indexed_OP1, - FMULv2i64_indexed_OP2, - FMULv4i16_indexed_OP1, - FMULv4i16_indexed_OP2, - FMULv4i32_indexed_OP1, - FMULv4i32_indexed_OP2, - FMULv8i16_indexed_OP1, - FMULv8i16_indexed_OP2, + FMLSv4i32_indexed_OP2 }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 94a6f4dd45b7..64adc973beeb 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4525,55 +4525,6 @@ static bool getFMAPatterns(MachineInstr &Root, return Found; } -static bool getFMULPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) { - MachineBasicBlock &MBB = *Root.getParent(); - bool Found = false; - - auto Match = [&](unsigned Opcode, int Operand, - MachineCombinerPattern Pattern) -> bool { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - MachineOperand &MO = Root.getOperand(Operand); - MachineInstr *MI = nullptr; - if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) - MI = MRI.getUniqueVRegDef(MO.getReg()); - if (MI && MI->getOpcode() == Opcode) { - Patterns.push_back(Pattern); - return true; - } - return false; - }; - - typedef MachineCombinerPattern MCP; - - switch (Root.getOpcode()) { - default: - return false; - case AArch64::FMULv2f32: - Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1); - Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2); - break; - case AArch64::FMULv2f64: - Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1); - Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2); - break; - case AArch64::FMULv4f16: - Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1); - Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2); - break; - case AArch64::FMULv4f32: - Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1); - Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2); - break; - case AArch64::FMULv8f16: - Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1); - Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2); - break; - } - - return Found; -} - /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern @@ -4637,16 +4588,6 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv4i32_indexed_OP2: case MachineCombinerPattern::FMLSv4f32_OP2: - case MachineCombinerPattern::FMULv2i32_indexed_OP1: - case MachineCombinerPattern::FMULv2i32_indexed_OP2: - case MachineCombinerPattern::FMULv2i64_indexed_OP1: - case MachineCombinerPattern::FMULv2i64_indexed_OP2: - case MachineCombinerPattern::FMULv4i16_indexed_OP1: - case MachineCombinerPattern::FMULv4i16_indexed_OP2: - case MachineCombinerPattern::FMULv4i32_indexed_OP1: - case MachineCombinerPattern::FMULv4i32_indexed_OP2: - case MachineCombinerPattern::FMULv8i16_indexed_OP1: - case MachineCombinerPattern::FMULv8i16_indexed_OP2: case MachineCombinerPattern::MULADDv8i8_OP1: case MachineCombinerPattern::MULADDv8i8_OP2: case MachineCombinerPattern::MULADDv16i8_OP1: @@ -4703,8 +4644,6 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( if (getMaddPatterns(Root, Patterns)) return true; // Floating point patterns - if (getFMULPatterns(Root, Patterns)) - return true; if (getFMAPatterns(Root, Patterns)) return true; @@ -4793,34 +4732,6 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, return MUL; } -static MachineInstr *genIndexedMultiply( - MachineInstr &Root, SmallVectorImpl &InsInstrs, - unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC) { - assert(IdxDupOp == 1 || IdxDupOp == 2); - - MachineFunction &MF = *Root.getMF(); - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - - MachineInstr *Dup = - MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); - Register DupSrcReg = Dup->getOperand(1).getReg(); - Register DupSrcLane = Dup->getOperand(2).getImm(); - - unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1; - MachineOperand &MulOp = Root.getOperand(IdxMulOp); - - Register ResultReg = Root.getOperand(0).getReg(); - - MachineInstrBuilder MIB; - MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg) - .add(MulOp) - .addReg(DupSrcReg) - .addImm(DupSrcLane); - - InsInstrs.push_back(MIB); - return &Root; -} - /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate /// instructions. /// @@ -5779,53 +5690,12 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::FMULv2i32_indexed_OP1: - case MachineCombinerPattern::FMULv2i32_indexed_OP2: { - unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2; - genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed, - &AArch64::FPR64RegClass); - break; - } - case MachineCombinerPattern::FMULv2i64_indexed_OP1: - case MachineCombinerPattern::FMULv2i64_indexed_OP2: { - unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2; - genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed, - &AArch64::FPR128RegClass); - break; - } - case MachineCombinerPattern::FMULv4i16_indexed_OP1: - case MachineCombinerPattern::FMULv4i16_indexed_OP2: { - unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2; - genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed, - &AArch64::FPR64RegClass); - break; - } - case MachineCombinerPattern::FMULv4i32_indexed_OP1: - case MachineCombinerPattern::FMULv4i32_indexed_OP2: { - unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2; - genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed, - &AArch64::FPR128RegClass); - break; - } - case MachineCombinerPattern::FMULv8i16_indexed_OP1: - case MachineCombinerPattern::FMULv8i16_indexed_OP2: { - unsigned IdxDupOp = - (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2; - genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed, - &AArch64::FPR128RegClass); - break; - } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and // CodeGen/AArch64/urem-seteq-nonzero.ll. // assert(MUL && "MUL was never set"); - if (MUL) - DelInstrs.push_back(MUL); + DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); } diff --git a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll index 1768314a97a1..95ef0f90d231 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s define void @foo_2d(double* %src) { ; CHECK-LABEL: %entry ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} @@ -134,128 +134,3 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } - -define void @indexed_2s(<2 x float> %shuf, <2 x float> %add, - <2 x float>* %pmul, <2 x float>* %pret) { -; CHECK-LABEL: %entry -; CHECK: for.body -; CHECK: fmla.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] -; -entry: - %shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] - %pmul_i = getelementptr inbounds <2 x float>, <2 x float>* %pmul, i64 %i - %pret_i = getelementptr inbounds <2 x float>, <2 x float>* %pret, i64 %i - - %mul_i = load <2 x float>, <2 x float>* %pmul_i - - %mul = fmul fast <2 x float> %mul_i, %shuffle - %muladd = fadd fast <2 x float> %mul, %add - - store <2 x float> %muladd, <2 x float>* %pret_i, align 16 - %inext = add i64 %i, 1 - br label %for.body -} - -define void @indexed_2d(<2 x double> %shuf, <2 x double> %add, - <2 x double>* %pmul, <2 x double>* %pret) { -; CHECK-LABEL: %entry -; CHECK: for.body -; CHECK: fmla.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] -; -entry: - %shuffle = shufflevector <2 x double> %shuf, <2 x double> undef, <2 x i32> zeroinitializer - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] - %pmul_i = getelementptr inbounds <2 x double>, <2 x double>* %pmul, i64 %i - %pret_i = getelementptr inbounds <2 x double>, <2 x double>* %pret, i64 %i - - %mul_i = load <2 x double>, <2 x double>* %pmul_i - - %mul = fmul fast <2 x double> %mul_i, %shuffle - %muladd = fadd fast <2 x double> %mul, %add - - store <2 x double> %muladd, <2 x double>* %pret_i, align 16 - %inext = add i64 %i, 1 - br label %for.body -} - -define void @indexed_4s(<4 x float> %shuf, <4 x float> %add, - <4 x float>* %pmul, <4 x float>* %pret) { -; CHECK-LABEL: %entry -; CHECK: for.body -; CHECK: fmla.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] -; -entry: - %shuffle = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> zeroinitializer - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] - %pmul_i = getelementptr inbounds <4 x float>, <4 x float>* %pmul, i64 %i - %pret_i = getelementptr inbounds <4 x float>, <4 x float>* %pret, i64 %i - - %mul_i = load <4 x float>, <4 x float>* %pmul_i - - %mul = fmul fast <4 x float> %mul_i, %shuffle - %muladd = fadd fast <4 x float> %mul, %add - - store <4 x float> %muladd, <4 x float>* %pret_i, align 16 - %inext = add i64 %i, 1 - br label %for.body -} - -define void @indexed_4h(<4 x half> %shuf, <4 x half> %add, - <4 x half>* %pmul, <4 x half>* %pret) { -; CHECK-LABEL: %entry -; CHECK: for.body -; CHECK: fmla.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] -; -entry: - %shuffle = shufflevector <4 x half> %shuf, <4 x half> undef, <4 x i32> zeroinitializer - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] - %pmul_i = getelementptr inbounds <4 x half>, <4 x half>* %pmul, i64 %i - %pret_i = getelementptr inbounds <4 x half>, <4 x half>* %pret, i64 %i - - %mul_i = load <4 x half>, <4 x half>* %pmul_i - - %mul = fmul fast <4 x half> %mul_i, %shuffle - %muladd = fadd fast <4 x half> %mul, %add - - store <4 x half> %muladd, <4 x half>* %pret_i, align 16 - %inext = add i64 %i, 1 - br label %for.body -} - -define void @indexed_8h(<8 x half> %shuf, <8 x half> %add, - <8 x half>* %pmul, <8 x half>* %pret) { -; CHECK-LABEL: %entry -; CHECK: for.body -; CHECK: fmla.8h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] -; -entry: - %shuffle = shufflevector <8 x half> %shuf, <8 x half> undef, <8 x i32> zeroinitializer - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %inext, %for.body ] - %pmul_i = getelementptr inbounds <8 x half>, <8 x half>* %pmul, i64 %i - %pret_i = getelementptr inbounds <8 x half>, <8 x half>* %pret, i64 %i - - %mul_i = load <8 x half>, <8 x half>* %pmul_i - - %mul = fmul fast <8 x half> %mul_i, %shuffle - %muladd = fadd fast <8 x half> %mul, %add - - store <8 x half> %muladd, <8 x half>* %pret_i, align 16 - %inext = add i64 %i, 1 - br label %for.body -} diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir deleted file mode 100644 index 29c8f38f83a9..000000000000 --- a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir +++ /dev/null @@ -1,378 +0,0 @@ -# RUN: llc -run-pass=machine-combiner -o - -simplify-mir -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 %s | FileCheck %s ---- | - ; ModuleID = 'lit.ll' - source_filename = "lit.ll" - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64-unknown-linux-gnu" - - define void @indexed_2s(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 { - entry: - %shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer - br label %for.cond - - for.cond: ; preds = %for.cond, %entry - %mul = fmul <2 x float> %mu, %shuffle - %add = fadd <2 x float> %mul, %ad - store <2 x float> %add, <2 x float>* %ret, align 16 - br label %for.cond - } - - define void @indexed_2s_rev(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 { - entry: - %shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer - br label %for.cond - - for.cond: ; preds = %for.cond, %entry - %mul = fmul <2 x float> %shuffle, %mu - %add = fadd <2 x float> %mul, %ad - store <2 x float> %add, <2 x float>* %ret, align 16 - br label %for.cond - } - - define void @indexed_2d(<2 x double> %shuf, <2 x double> %mu, <2 x double> %ad, <2 x double>* %ret) #0 { - entry: - %shuffle = shufflevector <2 x double> %shuf, <2 x double> undef, <2 x i32> zeroinitializer - br label %for.cond - - for.cond: ; preds = %for.cond, %entry - %mul = fmul <2 x double> %mu, %shuffle - %add = fadd <2 x double> %mul, %ad - store <2 x double> %add, <2 x double>* %ret, align 16 - br label %for.cond - } - - define void @indexed_4s(<4 x float> %shuf, <4 x float> %mu, <4 x float> %ad, <4 x float>* %ret) #0 { - entry: - %shuffle = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> zeroinitializer - br label %for.cond - - for.cond: ; preds = %for.cond, %entry - %mul = fmul <4 x float> %mu, %shuffle - %add = fadd <4 x float> %mul, %ad - store <4 x float> %add, <4 x float>* %ret, align 16 - br label %for.cond - } - - define void @indexed_4h(<4 x half> %shuf, <4 x half> %mu, <4 x half> %ad, <4 x half>* %ret) #0 { - entry: - %shuffle = shufflevector <4 x half> %shuf, <4 x half> undef, <4 x i32> zeroinitializer - br label %for.cond - - for.cond: - %mul = fmul <4 x half> %mu, %shuffle - %add = fadd <4 x half> %mul, %ad - store <4 x half> %add, <4 x half>* %ret, align 16 - br label %for.cond - } - - define void @indexed_8h(<8 x half> %shuf, <8 x half> %mu, <8 x half> %ad, <8 x half>* %ret) #0 { - entry: - %shuffle = shufflevector <8 x half> %shuf, <8 x half> undef, <8 x i32> zeroinitializer - br label %for.cond - - for.cond: - %mul = fmul <8 x half> %mu, %shuffle - %add = fadd <8 x half> %mul, %ad - store <8 x half> %add, <8 x half>* %ret, align 16 - br label %for.cond - } - - attributes #0 = { "target-cpu"="cortex-a57" } - -... -# CHECK-LABEL: name: indexed_2s -# CHECK: [[OP1COPY:%.*]]:fpr64 = COPY $d1 -# CHECK: [[OP2COPY:%.*]]:fpr64 = COPY $d0 -# CHECK: [[UNDEF:%.*]]:fpr128 = IMPLICIT_DEF -# CHECK: [[OP2:%.*]]:fpr128 = INSERT_SUBREG [[UNDEF]], [[OP2COPY]], %subreg.dsub -# CHECK: [[OP1:%.*]]:fpr64 = COPY [[OP1COPY]] -# CHECK-NOT: FMULv2f32 -# CHECK: :fpr64 = FMULv2i32_indexed [[OP1]], [[OP2]], 0 ---- -name: indexed_2s -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: fpr64 } - - { id: 1, class: fpr64 } - - { id: 2, class: fpr64 } - - { id: 3, class: fpr64 } - - { id: 4, class: gpr64common } - - { id: 5, class: fpr64 } - - { id: 6, class: fpr64 } - - { id: 7, class: fpr128 } - - { id: 8, class: fpr128 } - - { id: 9, class: fpr64 } - - { id: 10, class: fpr64 } -liveins: - - { reg: '$d0', virtual-reg: '%1' } - - { reg: '$d1', virtual-reg: '%2' } - - { reg: '$d2', virtual-reg: '%3' } - - { reg: '$x0', virtual-reg: '%4' } -frameInfo: - maxAlignment: 1 - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - bb.0.entry: - liveins: $d0, $d1, $d2, $x0 - - %4:gpr64common = COPY $x0 - %3:fpr64 = COPY $d2 - %2:fpr64 = COPY $d1 - %1:fpr64 = COPY $d0 - %8:fpr128 = IMPLICIT_DEF - %7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub - %6:fpr64 = COPY %3 - %5:fpr64 = COPY %2 - %0:fpr64 = DUPv2i32lane killed %7, 0 - - bb.1.for.cond: - %9:fpr64 = FMULv2f32 %5, %0 - %10:fpr64 = FADDv2f32 killed %9, %6 - STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16) - B %bb.1 - -... -# CHECK-LABEL: name: indexed_2s_rev -# CHECK: [[OP2COPY:%.*]]:fpr64 = COPY $d1 -# CHECK: [[OP1COPY:%.*]]:fpr64 = COPY $d0 -# CHECK: [[UNDEF:%.*]]:fpr128 = IMPLICIT_DEF -# CHECK: [[OP1:%.*]]:fpr128 = INSERT_SUBREG [[UNDEF]], [[OP1COPY]], %subreg.dsub -# CHECK: [[OP2:%.*]]:fpr64 = COPY [[OP2COPY]] -# CHECK-NOT: FMULv2f32 -# CHECK: :fpr64 = FMULv2i32_indexed [[OP2]], [[OP1]], 0 ---- -name: indexed_2s_rev -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: fpr64 } - - { id: 1, class: fpr64 } - - { id: 2, class: fpr64 } - - { id: 3, class: fpr64 } - - { id: 4, class: gpr64common } - - { id: 5, class: fpr64 } - - { id: 6, class: fpr64 } - - { id: 7, class: fpr128 } - - { id: 8, class: fpr128 } - - { id: 9, class: fpr64 } - - { id: 10, class: fpr64 } -liveins: - - { reg: '$d0', virtual-reg: '%1' } - - { reg: '$d1', virtual-reg: '%2' } - - { reg: '$d2', virtual-reg: '%3' } - - { reg: '$x0', virtual-reg: '%4' } -frameInfo: - maxAlignment: 1 - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - bb.0.entry: - liveins: $d0, $d1, $d2, $x0 - - %4:gpr64common = COPY $x0 - %3:fpr64 = COPY $d2 - %2:fpr64 = COPY $d1 - %1:fpr64 = COPY $d0 - %8:fpr128 = IMPLICIT_DEF - %7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub - %6:fpr64 = COPY %3 - %5:fpr64 = COPY %2 - %0:fpr64 = DUPv2i32lane killed %7, 0 - - bb.1.for.cond: - %9:fpr64 = FMULv2f32 %0, %5 - %10:fpr64 = FADDv2f32 killed %9, %6 - STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16) - B %bb.1 - -... -# CHECK-LABEL: name: indexed_2d -# CHECK: [[OP1COPY:%.*]]:fpr128 = COPY $q1 -# CHECK: [[OP2:%.*]]:fpr128 = COPY $q0 -# CHECK: [[OP1:%.*]]:fpr128 = COPY [[OP1COPY]] -# CHECK-NOT: FMULv2f64 -# CHECK: :fpr128 = FMULv2i64_indexed [[OP1]], [[OP2]], 0 ---- -name: indexed_2d -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: fpr128 } - - { id: 1, class: fpr128 } - - { id: 2, class: fpr128 } - - { id: 3, class: fpr128 } - - { id: 4, class: gpr64common } - - { id: 5, class: fpr128 } - - { id: 6, class: fpr128 } - - { id: 7, class: fpr128 } - - { id: 8, class: fpr128 } -liveins: - - { reg: '$q0', virtual-reg: '%1' } - - { reg: '$q1', virtual-reg: '%2' } - - { reg: '$q2', virtual-reg: '%3' } - - { reg: '$x0', virtual-reg: '%4' } -frameInfo: - maxAlignment: 1 - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - bb.0.entry: - liveins: $q0, $q1, $q2, $x0 - - %4:gpr64common = COPY $x0 - %3:fpr128 = COPY $q2 - %2:fpr128 = COPY $q1 - %1:fpr128 = COPY $q0 - %6:fpr128 = COPY %3 - %5:fpr128 = COPY %2 - %0:fpr128 = DUPv2i64lane %1, 0 - - bb.1.for.cond: - %7:fpr128 = FMULv2f64 %5, %0 - %8:fpr128 = FADDv2f64 killed %7, %6 - STRQui killed %8, %4, 0 :: (store 16 into %ir.ret) - B %bb.1 - -... -# CHECK-LABEL: name: indexed_4s -# CHECK: [[OP1COPY:%.*]]:fpr128 = COPY $q1 -# CHECK: [[OP2:%.*]]:fpr128 = COPY $q0 -# CHECK: [[OP1:%.*]]:fpr128 = COPY [[OP1COPY]] -# CHECK-NOT: FMULv4f32 -# CHECK: :fpr128 = FMULv4i32_indexed [[OP1]], [[OP2]], 0 ---- -name: indexed_4s -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: fpr128 } - - { id: 1, class: fpr128 } - - { id: 2, class: fpr128 } - - { id: 3, class: fpr128 } - - { id: 4, class: gpr64common } - - { id: 5, class: fpr128 } - - { id: 6, class: fpr128 } - - { id: 7, class: fpr128 } - - { id: 8, class: fpr128 } -liveins: - - { reg: '$q0', virtual-reg: '%1' } - - { reg: '$q1', virtual-reg: '%2' } - - { reg: '$q2', virtual-reg: '%3' } - - { reg: '$x0', virtual-reg: '%4' } -frameInfo: - maxAlignment: 1 - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - bb.0.entry: - liveins: $q0, $q1, $q2, $x0 - - %4:gpr64common = COPY $x0 - %3:fpr128 = COPY $q2 - %2:fpr128 = COPY $q1 - %1:fpr128 = COPY $q0 - %6:fpr128 = COPY %3 - %5:fpr128 = COPY %2 - %0:fpr128 = DUPv4i32lane %1, 0 - - bb.1.for.cond: - %7:fpr128 = FMULv4f32 %5, %0 - %8:fpr128 = FADDv4f32 killed %7, %6 - STRQui killed %8, %4, 0 :: (store 16 into %ir.ret) - B %bb.1 - -... -# CHECK-LABEL: name: indexed_4h -# CHECK: [[OP1:%.*]]:fpr64 = COPY $d1 -# CHECK: [[OP2COPY:%.*]]:fpr64 = COPY $d0 -# CHECK: [[UNDEF:%.*]]:fpr128 = IMPLICIT_DEF -# CHECK: [[OP2:%.*]]:fpr128 = INSERT_SUBREG [[UNDEF]], [[OP2COPY]], %subreg.dsub -# CHECK-NOT: FMULv4f16 -# CHECK: :fpr64 = FMULv4i16_indexed [[OP1]], [[OP2]], 0 ---- -name: indexed_4h -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: fpr64 } - - { id: 1, class: fpr64 } - - { id: 2, class: fpr64 } - - { id: 3, class: fpr64 } - - { id: 4, class: gpr64common } - - { id: 5, class: fpr128 } - - { id: 6, class: fpr128 } - - { id: 7, class: fpr64 } - - { id: 8, class: fpr64 } -liveins: - - { reg: '$d0', virtual-reg: '%1' } - - { reg: '$d1', virtual-reg: '%2' } - - { reg: '$d2', virtual-reg: '%3' } - - { reg: '$x0', virtual-reg: '%4' } -frameInfo: - maxAlignment: 1 - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - bb.0.entry: - liveins: $d0, $d1, $d2, $x0 - - %4:gpr64common = COPY $x0 - %3:fpr64 = COPY $d2 - %2:fpr64 = COPY $d1 - %1:fpr64 = COPY $d0 - %6:fpr128 = IMPLICIT_DEF - %5:fpr128 = INSERT_SUBREG %6, %1, %subreg.dsub - %0:fpr64 = DUPv4i16lane killed %5, 0 - - bb.1.for.cond: - %7:fpr64 = FMULv4f16 %2, %0 - %8:fpr64 = FADDv4f16 killed %7, %3 - STRDui killed %8, %4, 0 :: (store 8 into %ir.ret, align 16) - B %bb.1 - -... -# CHECK-LABEL: name: indexed_8h -# CHECK: [[OP1:%.*]]:fpr128 = COPY $q1 -# CHECK: [[OP2:%.*]]:fpr128 = COPY $q0 -# CHECK-NOT: FMULv8f16 -# CHECK: :fpr128 = FMULv8i16_indexed [[OP1]], [[OP2]], 0 ---- -name: indexed_8h -alignment: 16 -tracksRegLiveness: true -registers: - - { id: 0, class: fpr128 } - - { id: 1, class: fpr128 } - - { id: 2, class: fpr128 } - - { id: 3, class: fpr128 } - - { id: 4, class: gpr64common } - - { id: 5, class: fpr128 } - - { id: 6, class: fpr128 } -liveins: - - { reg: '$q0', virtual-reg: '%1' } - - { reg: '$q1', virtual-reg: '%2' } - - { reg: '$q2', virtual-reg: '%3' } - - { reg: '$x0', virtual-reg: '%4' } -frameInfo: - maxAlignment: 1 - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - bb.0.entry: - liveins: $q0, $q1, $q2, $x0 - - %4:gpr64common = COPY $x0 - %3:fpr128 = COPY $q2 - %2:fpr128 = COPY $q1 - %1:fpr128 = COPY $q0 - %0:fpr128 = DUPv8i16lane %1, 0 - - bb.1.for.cond: - %5:fpr128 = FMULv8f16 %2, %0 - %6:fpr128 = FADDv8f16 killed %5, %3 - STRQui killed %6, %4, 0 :: (store 16 into %ir.ret) - B %bb.1 - -...