forked from OSchip/llvm-project
Revert "[AArch64] Add Machine InstCombiner patterns for FMUL indexed variant"
This reverts commit cca9b5985c
.
Buildbot reported an error for CodeGen/AArch64/machine-combiner-fmul-dup.mir:
*** Bad machine code: Virtual register killed in block, but needed live out. ***
- function: indexed_2s
- basic block: %bb.0 entry (0x640fee8)
Virtual register %7 is used after the block.
*** Bad machine code: Virtual register defs don't dominate all uses. ***
- function: indexed_2s
- v. register: %7
LLVM ERROR: Found 2 machine code errors.
This commit is contained in:
parent
cca9b5985c
commit
f037b07b5c
|
@ -153,18 +153,7 @@ enum class MachineCombinerPattern {
|
|||
FMLSv4f32_OP1,
|
||||
FMLSv4f32_OP2,
|
||||
FMLSv4i32_indexed_OP1,
|
||||
FMLSv4i32_indexed_OP2,
|
||||
|
||||
FMULv2i32_indexed_OP1,
|
||||
FMULv2i32_indexed_OP2,
|
||||
FMULv2i64_indexed_OP1,
|
||||
FMULv2i64_indexed_OP2,
|
||||
FMULv4i16_indexed_OP1,
|
||||
FMULv4i16_indexed_OP2,
|
||||
FMULv4i32_indexed_OP1,
|
||||
FMULv4i32_indexed_OP2,
|
||||
FMULv8i16_indexed_OP1,
|
||||
FMULv8i16_indexed_OP2,
|
||||
FMLSv4i32_indexed_OP2
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -4525,55 +4525,6 @@ static bool getFMAPatterns(MachineInstr &Root,
|
|||
return Found;
|
||||
}
|
||||
|
||||
static bool getFMULPatterns(MachineInstr &Root,
|
||||
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
|
||||
MachineBasicBlock &MBB = *Root.getParent();
|
||||
bool Found = false;
|
||||
|
||||
auto Match = [&](unsigned Opcode, int Operand,
|
||||
MachineCombinerPattern Pattern) -> bool {
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
MachineOperand &MO = Root.getOperand(Operand);
|
||||
MachineInstr *MI = nullptr;
|
||||
if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
|
||||
MI = MRI.getUniqueVRegDef(MO.getReg());
|
||||
if (MI && MI->getOpcode() == Opcode) {
|
||||
Patterns.push_back(Pattern);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
typedef MachineCombinerPattern MCP;
|
||||
|
||||
switch (Root.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case AArch64::FMULv2f32:
|
||||
Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
|
||||
Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
|
||||
break;
|
||||
case AArch64::FMULv2f64:
|
||||
Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
|
||||
Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
|
||||
break;
|
||||
case AArch64::FMULv4f16:
|
||||
Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
|
||||
Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
|
||||
break;
|
||||
case AArch64::FMULv4f32:
|
||||
Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
|
||||
Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
|
||||
break;
|
||||
case AArch64::FMULv8f16:
|
||||
Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
|
||||
Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
|
||||
break;
|
||||
}
|
||||
|
||||
return Found;
|
||||
}
|
||||
|
||||
/// Return true when a code sequence can improve throughput. It
|
||||
/// should be called only for instructions in loops.
|
||||
/// \param Pattern - combiner pattern
|
||||
|
@ -4637,16 +4588,6 @@ bool AArch64InstrInfo::isThroughputPattern(
|
|||
case MachineCombinerPattern::FMLSv2f64_OP2:
|
||||
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
|
||||
case MachineCombinerPattern::FMLSv4f32_OP2:
|
||||
case MachineCombinerPattern::FMULv2i32_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv2i32_indexed_OP2:
|
||||
case MachineCombinerPattern::FMULv2i64_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv2i64_indexed_OP2:
|
||||
case MachineCombinerPattern::FMULv4i16_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv4i16_indexed_OP2:
|
||||
case MachineCombinerPattern::FMULv4i32_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv4i32_indexed_OP2:
|
||||
case MachineCombinerPattern::FMULv8i16_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv8i16_indexed_OP2:
|
||||
case MachineCombinerPattern::MULADDv8i8_OP1:
|
||||
case MachineCombinerPattern::MULADDv8i8_OP2:
|
||||
case MachineCombinerPattern::MULADDv16i8_OP1:
|
||||
|
@ -4703,8 +4644,6 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
|
|||
if (getMaddPatterns(Root, Patterns))
|
||||
return true;
|
||||
// Floating point patterns
|
||||
if (getFMULPatterns(Root, Patterns))
|
||||
return true;
|
||||
if (getFMAPatterns(Root, Patterns))
|
||||
return true;
|
||||
|
||||
|
@ -4793,34 +4732,6 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
|
|||
return MUL;
|
||||
}
|
||||
|
||||
static MachineInstr *genIndexedMultiply(
|
||||
MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
|
||||
unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC) {
|
||||
assert(IdxDupOp == 1 || IdxDupOp == 2);
|
||||
|
||||
MachineFunction &MF = *Root.getMF();
|
||||
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
|
||||
|
||||
MachineInstr *Dup =
|
||||
MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
|
||||
Register DupSrcReg = Dup->getOperand(1).getReg();
|
||||
Register DupSrcLane = Dup->getOperand(2).getImm();
|
||||
|
||||
unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
|
||||
MachineOperand &MulOp = Root.getOperand(IdxMulOp);
|
||||
|
||||
Register ResultReg = Root.getOperand(0).getReg();
|
||||
|
||||
MachineInstrBuilder MIB;
|
||||
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg)
|
||||
.add(MulOp)
|
||||
.addReg(DupSrcReg)
|
||||
.addImm(DupSrcLane);
|
||||
|
||||
InsInstrs.push_back(MIB);
|
||||
return &Root;
|
||||
}
|
||||
|
||||
/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
|
||||
/// instructions.
|
||||
///
|
||||
|
@ -5779,52 +5690,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
|
|||
}
|
||||
break;
|
||||
}
|
||||
case MachineCombinerPattern::FMULv2i32_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
|
||||
unsigned IdxDupOp =
|
||||
(Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
|
||||
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
|
||||
&AArch64::FPR64RegClass);
|
||||
break;
|
||||
}
|
||||
case MachineCombinerPattern::FMULv2i64_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
|
||||
unsigned IdxDupOp =
|
||||
(Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
|
||||
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
|
||||
&AArch64::FPR128RegClass);
|
||||
break;
|
||||
}
|
||||
case MachineCombinerPattern::FMULv4i16_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
|
||||
unsigned IdxDupOp =
|
||||
(Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
|
||||
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
|
||||
&AArch64::FPR64RegClass);
|
||||
break;
|
||||
}
|
||||
case MachineCombinerPattern::FMULv4i32_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
|
||||
unsigned IdxDupOp =
|
||||
(Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
|
||||
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
|
||||
&AArch64::FPR128RegClass);
|
||||
break;
|
||||
}
|
||||
case MachineCombinerPattern::FMULv8i16_indexed_OP1:
|
||||
case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
|
||||
unsigned IdxDupOp =
|
||||
(Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
|
||||
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
|
||||
&AArch64::FPR128RegClass);
|
||||
break;
|
||||
}
|
||||
} // end switch (Pattern)
|
||||
// Record MUL and ADD/SUB for deletion
|
||||
// FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
|
||||
// CodeGen/AArch64/urem-seteq-nonzero.ll.
|
||||
// assert(MUL && "MUL was never set");
|
||||
if (MUL)
|
||||
DelInstrs.push_back(MUL);
|
||||
DelInstrs.push_back(&Root);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 -enable-unsafe-fp-math | FileCheck %s
|
||||
; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s
|
||||
define void @foo_2d(double* %src) {
|
||||
; CHECK-LABEL: %entry
|
||||
; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
|
||||
|
@ -134,128 +134,3 @@ for.body: ; preds = %for.body, %entry
|
|||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @indexed_2s(<2 x float> %shuf, <2 x float> %add,
|
||||
<2 x float>* %pmul, <2 x float>* %pret) {
|
||||
; CHECK-LABEL: %entry
|
||||
; CHECK: for.body
|
||||
; CHECK: fmla.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
|
||||
;
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
|
||||
%pmul_i = getelementptr inbounds <2 x float>, <2 x float>* %pmul, i64 %i
|
||||
%pret_i = getelementptr inbounds <2 x float>, <2 x float>* %pret, i64 %i
|
||||
|
||||
%mul_i = load <2 x float>, <2 x float>* %pmul_i
|
||||
|
||||
%mul = fmul fast <2 x float> %mul_i, %shuffle
|
||||
%muladd = fadd fast <2 x float> %mul, %add
|
||||
|
||||
store <2 x float> %muladd, <2 x float>* %pret_i, align 16
|
||||
%inext = add i64 %i, 1
|
||||
br label %for.body
|
||||
}
|
||||
|
||||
define void @indexed_2d(<2 x double> %shuf, <2 x double> %add,
|
||||
<2 x double>* %pmul, <2 x double>* %pret) {
|
||||
; CHECK-LABEL: %entry
|
||||
; CHECK: for.body
|
||||
; CHECK: fmla.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
|
||||
;
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x double> %shuf, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
|
||||
%pmul_i = getelementptr inbounds <2 x double>, <2 x double>* %pmul, i64 %i
|
||||
%pret_i = getelementptr inbounds <2 x double>, <2 x double>* %pret, i64 %i
|
||||
|
||||
%mul_i = load <2 x double>, <2 x double>* %pmul_i
|
||||
|
||||
%mul = fmul fast <2 x double> %mul_i, %shuffle
|
||||
%muladd = fadd fast <2 x double> %mul, %add
|
||||
|
||||
store <2 x double> %muladd, <2 x double>* %pret_i, align 16
|
||||
%inext = add i64 %i, 1
|
||||
br label %for.body
|
||||
}
|
||||
|
||||
define void @indexed_4s(<4 x float> %shuf, <4 x float> %add,
|
||||
<4 x float>* %pmul, <4 x float>* %pret) {
|
||||
; CHECK-LABEL: %entry
|
||||
; CHECK: for.body
|
||||
; CHECK: fmla.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
|
||||
;
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
|
||||
%pmul_i = getelementptr inbounds <4 x float>, <4 x float>* %pmul, i64 %i
|
||||
%pret_i = getelementptr inbounds <4 x float>, <4 x float>* %pret, i64 %i
|
||||
|
||||
%mul_i = load <4 x float>, <4 x float>* %pmul_i
|
||||
|
||||
%mul = fmul fast <4 x float> %mul_i, %shuffle
|
||||
%muladd = fadd fast <4 x float> %mul, %add
|
||||
|
||||
store <4 x float> %muladd, <4 x float>* %pret_i, align 16
|
||||
%inext = add i64 %i, 1
|
||||
br label %for.body
|
||||
}
|
||||
|
||||
define void @indexed_4h(<4 x half> %shuf, <4 x half> %add,
|
||||
<4 x half>* %pmul, <4 x half>* %pret) {
|
||||
; CHECK-LABEL: %entry
|
||||
; CHECK: for.body
|
||||
; CHECK: fmla.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
|
||||
;
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x half> %shuf, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
|
||||
%pmul_i = getelementptr inbounds <4 x half>, <4 x half>* %pmul, i64 %i
|
||||
%pret_i = getelementptr inbounds <4 x half>, <4 x half>* %pret, i64 %i
|
||||
|
||||
%mul_i = load <4 x half>, <4 x half>* %pmul_i
|
||||
|
||||
%mul = fmul fast <4 x half> %mul_i, %shuffle
|
||||
%muladd = fadd fast <4 x half> %mul, %add
|
||||
|
||||
store <4 x half> %muladd, <4 x half>* %pret_i, align 16
|
||||
%inext = add i64 %i, 1
|
||||
br label %for.body
|
||||
}
|
||||
|
||||
define void @indexed_8h(<8 x half> %shuf, <8 x half> %add,
|
||||
<8 x half>* %pmul, <8 x half>* %pret) {
|
||||
; CHECK-LABEL: %entry
|
||||
; CHECK: for.body
|
||||
; CHECK: fmla.8h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
|
||||
;
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x half> %shuf, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
|
||||
%pmul_i = getelementptr inbounds <8 x half>, <8 x half>* %pmul, i64 %i
|
||||
%pret_i = getelementptr inbounds <8 x half>, <8 x half>* %pret, i64 %i
|
||||
|
||||
%mul_i = load <8 x half>, <8 x half>* %pmul_i
|
||||
|
||||
%mul = fmul fast <8 x half> %mul_i, %shuffle
|
||||
%muladd = fadd fast <8 x half> %mul, %add
|
||||
|
||||
store <8 x half> %muladd, <8 x half>* %pret_i, align 16
|
||||
%inext = add i64 %i, 1
|
||||
br label %for.body
|
||||
}
|
||||
|
|
|
@ -1,378 +0,0 @@
|
|||
# RUN: llc -run-pass=machine-combiner -o - -simplify-mir -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 %s | FileCheck %s
|
||||
--- |
|
||||
; ModuleID = 'lit.ll'
|
||||
source_filename = "lit.ll"
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define void @indexed_2s(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 {
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.cond, %entry
|
||||
%mul = fmul <2 x float> %mu, %shuffle
|
||||
%add = fadd <2 x float> %mul, %ad
|
||||
store <2 x float> %add, <2 x float>* %ret, align 16
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
define void @indexed_2s_rev(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 {
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.cond, %entry
|
||||
%mul = fmul <2 x float> %shuffle, %mu
|
||||
%add = fadd <2 x float> %mul, %ad
|
||||
store <2 x float> %add, <2 x float>* %ret, align 16
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
define void @indexed_2d(<2 x double> %shuf, <2 x double> %mu, <2 x double> %ad, <2 x double>* %ret) #0 {
|
||||
entry:
|
||||
%shuffle = shufflevector <2 x double> %shuf, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.cond, %entry
|
||||
%mul = fmul <2 x double> %mu, %shuffle
|
||||
%add = fadd <2 x double> %mul, %ad
|
||||
store <2 x double> %add, <2 x double>* %ret, align 16
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
define void @indexed_4s(<4 x float> %shuf, <4 x float> %mu, <4 x float> %ad, <4 x float>* %ret) #0 {
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.cond, %entry
|
||||
%mul = fmul <4 x float> %mu, %shuffle
|
||||
%add = fadd <4 x float> %mul, %ad
|
||||
store <4 x float> %add, <4 x float>* %ret, align 16
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
define void @indexed_4h(<4 x half> %shuf, <4 x half> %mu, <4 x half> %ad, <4 x half>* %ret) #0 {
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x half> %shuf, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%mul = fmul <4 x half> %mu, %shuffle
|
||||
%add = fadd <4 x half> %mul, %ad
|
||||
store <4 x half> %add, <4 x half>* %ret, align 16
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
define void @indexed_8h(<8 x half> %shuf, <8 x half> %mu, <8 x half> %ad, <8 x half>* %ret) #0 {
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x half> %shuf, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%mul = fmul <8 x half> %mu, %shuffle
|
||||
%add = fadd <8 x half> %mul, %ad
|
||||
store <8 x half> %add, <8 x half>* %ret, align 16
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
attributes #0 = { "target-cpu"="cortex-a57" }
|
||||
|
||||
...
|
||||
# CHECK-LABEL: name: indexed_2s
|
||||
# CHECK: [[OP1COPY:%.*]]:fpr64 = COPY $d1
|
||||
# CHECK: [[OP2COPY:%.*]]:fpr64 = COPY $d0
|
||||
# CHECK: [[UNDEF:%.*]]:fpr128 = IMPLICIT_DEF
|
||||
# CHECK: [[OP2:%.*]]:fpr128 = INSERT_SUBREG [[UNDEF]], [[OP2COPY]], %subreg.dsub
|
||||
# CHECK: [[OP1:%.*]]:fpr64 = COPY [[OP1COPY]]
|
||||
# CHECK-NOT: FMULv2f32
|
||||
# CHECK: :fpr64 = FMULv2i32_indexed [[OP1]], [[OP2]], 0
|
||||
---
|
||||
name: indexed_2s
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr64 }
|
||||
- { id: 1, class: fpr64 }
|
||||
- { id: 2, class: fpr64 }
|
||||
- { id: 3, class: fpr64 }
|
||||
- { id: 4, class: gpr64common }
|
||||
- { id: 5, class: fpr64 }
|
||||
- { id: 6, class: fpr64 }
|
||||
- { id: 7, class: fpr128 }
|
||||
- { id: 8, class: fpr128 }
|
||||
- { id: 9, class: fpr64 }
|
||||
- { id: 10, class: fpr64 }
|
||||
liveins:
|
||||
- { reg: '$d0', virtual-reg: '%1' }
|
||||
- { reg: '$d1', virtual-reg: '%2' }
|
||||
- { reg: '$d2', virtual-reg: '%3' }
|
||||
- { reg: '$x0', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $d0, $d1, $d2, $x0
|
||||
|
||||
%4:gpr64common = COPY $x0
|
||||
%3:fpr64 = COPY $d2
|
||||
%2:fpr64 = COPY $d1
|
||||
%1:fpr64 = COPY $d0
|
||||
%8:fpr128 = IMPLICIT_DEF
|
||||
%7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub
|
||||
%6:fpr64 = COPY %3
|
||||
%5:fpr64 = COPY %2
|
||||
%0:fpr64 = DUPv2i32lane killed %7, 0
|
||||
|
||||
bb.1.for.cond:
|
||||
%9:fpr64 = FMULv2f32 %5, %0
|
||||
%10:fpr64 = FADDv2f32 killed %9, %6
|
||||
STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16)
|
||||
B %bb.1
|
||||
|
||||
...
|
||||
# CHECK-LABEL: name: indexed_2s_rev
|
||||
# CHECK: [[OP2COPY:%.*]]:fpr64 = COPY $d1
|
||||
# CHECK: [[OP1COPY:%.*]]:fpr64 = COPY $d0
|
||||
# CHECK: [[UNDEF:%.*]]:fpr128 = IMPLICIT_DEF
|
||||
# CHECK: [[OP1:%.*]]:fpr128 = INSERT_SUBREG [[UNDEF]], [[OP1COPY]], %subreg.dsub
|
||||
# CHECK: [[OP2:%.*]]:fpr64 = COPY [[OP2COPY]]
|
||||
# CHECK-NOT: FMULv2f32
|
||||
# CHECK: :fpr64 = FMULv2i32_indexed [[OP2]], [[OP1]], 0
|
||||
---
|
||||
name: indexed_2s_rev
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr64 }
|
||||
- { id: 1, class: fpr64 }
|
||||
- { id: 2, class: fpr64 }
|
||||
- { id: 3, class: fpr64 }
|
||||
- { id: 4, class: gpr64common }
|
||||
- { id: 5, class: fpr64 }
|
||||
- { id: 6, class: fpr64 }
|
||||
- { id: 7, class: fpr128 }
|
||||
- { id: 8, class: fpr128 }
|
||||
- { id: 9, class: fpr64 }
|
||||
- { id: 10, class: fpr64 }
|
||||
liveins:
|
||||
- { reg: '$d0', virtual-reg: '%1' }
|
||||
- { reg: '$d1', virtual-reg: '%2' }
|
||||
- { reg: '$d2', virtual-reg: '%3' }
|
||||
- { reg: '$x0', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $d0, $d1, $d2, $x0
|
||||
|
||||
%4:gpr64common = COPY $x0
|
||||
%3:fpr64 = COPY $d2
|
||||
%2:fpr64 = COPY $d1
|
||||
%1:fpr64 = COPY $d0
|
||||
%8:fpr128 = IMPLICIT_DEF
|
||||
%7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub
|
||||
%6:fpr64 = COPY %3
|
||||
%5:fpr64 = COPY %2
|
||||
%0:fpr64 = DUPv2i32lane killed %7, 0
|
||||
|
||||
bb.1.for.cond:
|
||||
%9:fpr64 = FMULv2f32 %0, %5
|
||||
%10:fpr64 = FADDv2f32 killed %9, %6
|
||||
STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16)
|
||||
B %bb.1
|
||||
|
||||
...
|
||||
# CHECK-LABEL: name: indexed_2d
|
||||
# CHECK: [[OP1COPY:%.*]]:fpr128 = COPY $q1
|
||||
# CHECK: [[OP2:%.*]]:fpr128 = COPY $q0
|
||||
# CHECK: [[OP1:%.*]]:fpr128 = COPY [[OP1COPY]]
|
||||
# CHECK-NOT: FMULv2f64
|
||||
# CHECK: :fpr128 = FMULv2i64_indexed [[OP1]], [[OP2]], 0
|
||||
---
|
||||
name: indexed_2d
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr128 }
|
||||
- { id: 1, class: fpr128 }
|
||||
- { id: 2, class: fpr128 }
|
||||
- { id: 3, class: fpr128 }
|
||||
- { id: 4, class: gpr64common }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
- { id: 7, class: fpr128 }
|
||||
- { id: 8, class: fpr128 }
|
||||
liveins:
|
||||
- { reg: '$q0', virtual-reg: '%1' }
|
||||
- { reg: '$q1', virtual-reg: '%2' }
|
||||
- { reg: '$q2', virtual-reg: '%3' }
|
||||
- { reg: '$x0', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0, $q1, $q2, $x0
|
||||
|
||||
%4:gpr64common = COPY $x0
|
||||
%3:fpr128 = COPY $q2
|
||||
%2:fpr128 = COPY $q1
|
||||
%1:fpr128 = COPY $q0
|
||||
%6:fpr128 = COPY %3
|
||||
%5:fpr128 = COPY %2
|
||||
%0:fpr128 = DUPv2i64lane %1, 0
|
||||
|
||||
bb.1.for.cond:
|
||||
%7:fpr128 = FMULv2f64 %5, %0
|
||||
%8:fpr128 = FADDv2f64 killed %7, %6
|
||||
STRQui killed %8, %4, 0 :: (store 16 into %ir.ret)
|
||||
B %bb.1
|
||||
|
||||
...
|
||||
# CHECK-LABEL: name: indexed_4s
|
||||
# CHECK: [[OP1COPY:%.*]]:fpr128 = COPY $q1
|
||||
# CHECK: [[OP2:%.*]]:fpr128 = COPY $q0
|
||||
# CHECK: [[OP1:%.*]]:fpr128 = COPY [[OP1COPY]]
|
||||
# CHECK-NOT: FMULv4f32
|
||||
# CHECK: :fpr128 = FMULv4i32_indexed [[OP1]], [[OP2]], 0
|
||||
---
|
||||
name: indexed_4s
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr128 }
|
||||
- { id: 1, class: fpr128 }
|
||||
- { id: 2, class: fpr128 }
|
||||
- { id: 3, class: fpr128 }
|
||||
- { id: 4, class: gpr64common }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
- { id: 7, class: fpr128 }
|
||||
- { id: 8, class: fpr128 }
|
||||
liveins:
|
||||
- { reg: '$q0', virtual-reg: '%1' }
|
||||
- { reg: '$q1', virtual-reg: '%2' }
|
||||
- { reg: '$q2', virtual-reg: '%3' }
|
||||
- { reg: '$x0', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0, $q1, $q2, $x0
|
||||
|
||||
%4:gpr64common = COPY $x0
|
||||
%3:fpr128 = COPY $q2
|
||||
%2:fpr128 = COPY $q1
|
||||
%1:fpr128 = COPY $q0
|
||||
%6:fpr128 = COPY %3
|
||||
%5:fpr128 = COPY %2
|
||||
%0:fpr128 = DUPv4i32lane %1, 0
|
||||
|
||||
bb.1.for.cond:
|
||||
%7:fpr128 = FMULv4f32 %5, %0
|
||||
%8:fpr128 = FADDv4f32 killed %7, %6
|
||||
STRQui killed %8, %4, 0 :: (store 16 into %ir.ret)
|
||||
B %bb.1
|
||||
|
||||
...
|
||||
# CHECK-LABEL: name: indexed_4h
|
||||
# CHECK: [[OP1:%.*]]:fpr64 = COPY $d1
|
||||
# CHECK: [[OP2COPY:%.*]]:fpr64 = COPY $d0
|
||||
# CHECK: [[UNDEF:%.*]]:fpr128 = IMPLICIT_DEF
|
||||
# CHECK: [[OP2:%.*]]:fpr128 = INSERT_SUBREG [[UNDEF]], [[OP2COPY]], %subreg.dsub
|
||||
# CHECK-NOT: FMULv4f16
|
||||
# CHECK: :fpr64 = FMULv4i16_indexed [[OP1]], [[OP2]], 0
|
||||
---
|
||||
name: indexed_4h
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr64 }
|
||||
- { id: 1, class: fpr64 }
|
||||
- { id: 2, class: fpr64 }
|
||||
- { id: 3, class: fpr64 }
|
||||
- { id: 4, class: gpr64common }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
- { id: 7, class: fpr64 }
|
||||
- { id: 8, class: fpr64 }
|
||||
liveins:
|
||||
- { reg: '$d0', virtual-reg: '%1' }
|
||||
- { reg: '$d1', virtual-reg: '%2' }
|
||||
- { reg: '$d2', virtual-reg: '%3' }
|
||||
- { reg: '$x0', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $d0, $d1, $d2, $x0
|
||||
|
||||
%4:gpr64common = COPY $x0
|
||||
%3:fpr64 = COPY $d2
|
||||
%2:fpr64 = COPY $d1
|
||||
%1:fpr64 = COPY $d0
|
||||
%6:fpr128 = IMPLICIT_DEF
|
||||
%5:fpr128 = INSERT_SUBREG %6, %1, %subreg.dsub
|
||||
%0:fpr64 = DUPv4i16lane killed %5, 0
|
||||
|
||||
bb.1.for.cond:
|
||||
%7:fpr64 = FMULv4f16 %2, %0
|
||||
%8:fpr64 = FADDv4f16 killed %7, %3
|
||||
STRDui killed %8, %4, 0 :: (store 8 into %ir.ret, align 16)
|
||||
B %bb.1
|
||||
|
||||
...
|
||||
# CHECK-LABEL: name: indexed_8h
|
||||
# CHECK: [[OP1:%.*]]:fpr128 = COPY $q1
|
||||
# CHECK: [[OP2:%.*]]:fpr128 = COPY $q0
|
||||
# CHECK-NOT: FMULv8f16
|
||||
# CHECK: :fpr128 = FMULv8i16_indexed [[OP1]], [[OP2]], 0
|
||||
---
|
||||
name: indexed_8h
|
||||
alignment: 16
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr128 }
|
||||
- { id: 1, class: fpr128 }
|
||||
- { id: 2, class: fpr128 }
|
||||
- { id: 3, class: fpr128 }
|
||||
- { id: 4, class: gpr64common }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
liveins:
|
||||
- { reg: '$q0', virtual-reg: '%1' }
|
||||
- { reg: '$q1', virtual-reg: '%2' }
|
||||
- { reg: '$q2', virtual-reg: '%3' }
|
||||
- { reg: '$x0', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0, $q1, $q2, $x0
|
||||
|
||||
%4:gpr64common = COPY $x0
|
||||
%3:fpr128 = COPY $q2
|
||||
%2:fpr128 = COPY $q1
|
||||
%1:fpr128 = COPY $q0
|
||||
%0:fpr128 = DUPv8i16lane %1, 0
|
||||
|
||||
bb.1.for.cond:
|
||||
%5:fpr128 = FMULv8f16 %2, %0
|
||||
%6:fpr128 = FADDv8f16 killed %5, %3
|
||||
STRQui killed %6, %4, 0 :: (store 16 into %ir.ret)
|
||||
B %bb.1
|
||||
|
||||
...
|
Loading…
Reference in New Issue