forked from OSchip/llvm-project
[X86] Add comment string for broadcast loads from the constant pool.
Summary: When broadcasting from the constant pool its useful to print out the final vector similar to what we do for normal moves from the constant pool. I changed only a couple tests that were broadcast focused. One of them had been previously hand tweaked after running the script so that it could check the constant pool declaration. But I think this patch makes that unnecessary now since we can check the comment instead. Reviewers: spatel, RKSimon, zvi Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D34923 llvm-svn: 307062
This commit is contained in:
parent
49fc24a8bf
commit
ad140cfb68
|
@ -1332,6 +1332,32 @@ static std::string getShuffleComment(const MachineInstr *MI,
|
|||
return Comment;
|
||||
}
|
||||
|
||||
static void printConstant(const Constant *COp, raw_ostream &CS) {
|
||||
if (isa<UndefValue>(COp)) {
|
||||
CS << "u";
|
||||
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
|
||||
if (CI->getBitWidth() <= 64) {
|
||||
CS << CI->getZExtValue();
|
||||
} else {
|
||||
// print multi-word constant as (w0,w1)
|
||||
const auto &Val = CI->getValue();
|
||||
CS << "(";
|
||||
for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
|
||||
if (i > 0)
|
||||
CS << ",";
|
||||
CS << Val.getRawData()[i];
|
||||
}
|
||||
CS << ")";
|
||||
}
|
||||
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
|
||||
SmallString<32> Str;
|
||||
CF->getValueAPF().toString(Str);
|
||||
CS << Str;
|
||||
} else {
|
||||
CS << "?";
|
||||
}
|
||||
}
|
||||
|
||||
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
X86MCInstLower MCInstLowering(*MF, *this);
|
||||
const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
|
||||
|
@ -1766,59 +1792,73 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
// For loads from a constant pool to a vector register, print the constant
|
||||
// loaded.
|
||||
CASE_ALL_MOV_RM()
|
||||
case X86::VBROADCASTF128:
|
||||
case X86::VBROADCASTI128:
|
||||
case X86::VBROADCASTF32X4Z256rm:
|
||||
case X86::VBROADCASTF32X4rm:
|
||||
case X86::VBROADCASTF32X8rm:
|
||||
case X86::VBROADCASTF64X2Z128rm:
|
||||
case X86::VBROADCASTF64X2rm:
|
||||
case X86::VBROADCASTF64X4rm:
|
||||
case X86::VBROADCASTI32X4Z256rm:
|
||||
case X86::VBROADCASTI32X4rm:
|
||||
case X86::VBROADCASTI32X8rm:
|
||||
case X86::VBROADCASTI64X2Z128rm:
|
||||
case X86::VBROADCASTI64X2rm:
|
||||
case X86::VBROADCASTI64X4rm:
|
||||
if (!OutStreamer->isVerboseAsm())
|
||||
break;
|
||||
if (MI->getNumOperands() <= 4)
|
||||
break;
|
||||
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
|
||||
int NumLanes = 1;
|
||||
// Override NumLanes for the broadcast instructions.
|
||||
switch (MI->getOpcode()) {
|
||||
case X86::VBROADCASTF128: NumLanes = 2; break;
|
||||
case X86::VBROADCASTI128: NumLanes = 2; break;
|
||||
case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
|
||||
case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
|
||||
case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
|
||||
case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
|
||||
case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
|
||||
case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
|
||||
case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
|
||||
case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
|
||||
case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
|
||||
case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
|
||||
case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
|
||||
case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
|
||||
}
|
||||
|
||||
std::string Comment;
|
||||
raw_string_ostream CS(Comment);
|
||||
const MachineOperand &DstOp = MI->getOperand(0);
|
||||
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
|
||||
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
|
||||
CS << "[";
|
||||
for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
|
||||
if (i != 0)
|
||||
CS << ",";
|
||||
if (CDS->getElementType()->isIntegerTy())
|
||||
CS << CDS->getElementAsInteger(i);
|
||||
else if (CDS->getElementType()->isFloatTy())
|
||||
CS << CDS->getElementAsFloat(i);
|
||||
else if (CDS->getElementType()->isDoubleTy())
|
||||
CS << CDS->getElementAsDouble(i);
|
||||
else
|
||||
CS << "?";
|
||||
for (int l = 0; l != NumLanes; ++l) {
|
||||
for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
|
||||
if (i != 0 || l != 0)
|
||||
CS << ",";
|
||||
if (CDS->getElementType()->isIntegerTy())
|
||||
CS << CDS->getElementAsInteger(i);
|
||||
else if (CDS->getElementType()->isFloatTy())
|
||||
CS << CDS->getElementAsFloat(i);
|
||||
else if (CDS->getElementType()->isDoubleTy())
|
||||
CS << CDS->getElementAsDouble(i);
|
||||
else
|
||||
CS << "?";
|
||||
}
|
||||
}
|
||||
CS << "]";
|
||||
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
|
||||
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
|
||||
CS << "<";
|
||||
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
|
||||
if (i != 0)
|
||||
CS << ",";
|
||||
Constant *COp = CV->getOperand(i);
|
||||
if (isa<UndefValue>(COp)) {
|
||||
CS << "u";
|
||||
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
|
||||
if (CI->getBitWidth() <= 64) {
|
||||
CS << CI->getZExtValue();
|
||||
} else {
|
||||
// print multi-word constant as (w0,w1)
|
||||
const auto &Val = CI->getValue();
|
||||
CS << "(";
|
||||
for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
|
||||
if (i > 0)
|
||||
CS << ",";
|
||||
CS << Val.getRawData()[i];
|
||||
}
|
||||
CS << ")";
|
||||
}
|
||||
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
|
||||
SmallString<32> Str;
|
||||
CF->getValueAPF().toString(Str);
|
||||
CS << Str;
|
||||
} else {
|
||||
CS << "?";
|
||||
for (int l = 0; l != NumLanes; ++l) {
|
||||
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
|
||||
if (i != 0 || l != 0)
|
||||
CS << ",";
|
||||
printConstant(CV->getOperand(i), CS);
|
||||
}
|
||||
}
|
||||
CS << ">";
|
||||
|
@ -1826,6 +1866,85 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case X86::VBROADCASTSSrm:
|
||||
case X86::VBROADCASTSSYrm:
|
||||
case X86::VBROADCASTSSZ128m:
|
||||
case X86::VBROADCASTSSZ256m:
|
||||
case X86::VBROADCASTSSZm:
|
||||
case X86::VBROADCASTSDYrm:
|
||||
case X86::VBROADCASTSDZ256m:
|
||||
case X86::VBROADCASTSDZm:
|
||||
case X86::VPBROADCASTBrm:
|
||||
case X86::VPBROADCASTBYrm:
|
||||
case X86::VPBROADCASTBZ128m:
|
||||
case X86::VPBROADCASTBZ256m:
|
||||
case X86::VPBROADCASTBZm:
|
||||
case X86::VPBROADCASTDrm:
|
||||
case X86::VPBROADCASTDYrm:
|
||||
case X86::VPBROADCASTDZ128m:
|
||||
case X86::VPBROADCASTDZ256m:
|
||||
case X86::VPBROADCASTDZm:
|
||||
case X86::VPBROADCASTQrm:
|
||||
case X86::VPBROADCASTQYrm:
|
||||
case X86::VPBROADCASTQZ128m:
|
||||
case X86::VPBROADCASTQZ256m:
|
||||
case X86::VPBROADCASTQZm:
|
||||
case X86::VPBROADCASTWrm:
|
||||
case X86::VPBROADCASTWYrm:
|
||||
case X86::VPBROADCASTWZ128m:
|
||||
case X86::VPBROADCASTWZ256m:
|
||||
case X86::VPBROADCASTWZm:
|
||||
if (!OutStreamer->isVerboseAsm())
|
||||
break;
|
||||
if (MI->getNumOperands() <= 4)
|
||||
break;
|
||||
if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
|
||||
int NumElts;
|
||||
switch (MI->getOpcode()) {
|
||||
default: llvm_unreachable("Invalid opcode");
|
||||
case X86::VBROADCASTSSrm: NumElts = 4; break;
|
||||
case X86::VBROADCASTSSYrm: NumElts = 8; break;
|
||||
case X86::VBROADCASTSSZ128m: NumElts = 4; break;
|
||||
case X86::VBROADCASTSSZ256m: NumElts = 8; break;
|
||||
case X86::VBROADCASTSSZm: NumElts = 16; break;
|
||||
case X86::VBROADCASTSDYrm: NumElts = 4; break;
|
||||
case X86::VBROADCASTSDZ256m: NumElts = 4; break;
|
||||
case X86::VBROADCASTSDZm: NumElts = 8; break;
|
||||
case X86::VPBROADCASTBrm: NumElts = 16; break;
|
||||
case X86::VPBROADCASTBYrm: NumElts = 32; break;
|
||||
case X86::VPBROADCASTBZ128m: NumElts = 16; break;
|
||||
case X86::VPBROADCASTBZ256m: NumElts = 32; break;
|
||||
case X86::VPBROADCASTBZm: NumElts = 64; break;
|
||||
case X86::VPBROADCASTDrm: NumElts = 4; break;
|
||||
case X86::VPBROADCASTDYrm: NumElts = 8; break;
|
||||
case X86::VPBROADCASTDZ128m: NumElts = 4; break;
|
||||
case X86::VPBROADCASTDZ256m: NumElts = 8; break;
|
||||
case X86::VPBROADCASTDZm: NumElts = 16; break;
|
||||
case X86::VPBROADCASTQrm: NumElts = 2; break;
|
||||
case X86::VPBROADCASTQYrm: NumElts = 4; break;
|
||||
case X86::VPBROADCASTQZ128m: NumElts = 2; break;
|
||||
case X86::VPBROADCASTQZ256m: NumElts = 4; break;
|
||||
case X86::VPBROADCASTQZm: NumElts = 8; break;
|
||||
case X86::VPBROADCASTWrm: NumElts = 8; break;
|
||||
case X86::VPBROADCASTWYrm: NumElts = 16; break;
|
||||
case X86::VPBROADCASTWZ128m: NumElts = 8; break;
|
||||
case X86::VPBROADCASTWZ256m: NumElts = 16; break;
|
||||
case X86::VPBROADCASTWZm: NumElts = 32; break;
|
||||
}
|
||||
|
||||
std::string Comment;
|
||||
raw_string_ostream CS(Comment);
|
||||
const MachineOperand &DstOp = MI->getOperand(0);
|
||||
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
|
||||
CS << "[";
|
||||
for (int i = 0; i != NumElts; ++i) {
|
||||
if (i != 0)
|
||||
CS << ",";
|
||||
printConstant(C, CS);
|
||||
}
|
||||
CS << "]";
|
||||
OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
|
||||
}
|
||||
}
|
||||
|
||||
MCInst TmpInst;
|
||||
|
|
|
@ -2624,7 +2624,8 @@ define void @avg_v64i8_const(<64 x i8>* %a) {
|
|||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = mem[0,1,2,3,0,1,2,3]
|
||||
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm4 = [1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8]
|
||||
; AVX512F-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %zmm4, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vpaddd %zmm4, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpaddd %zmm4, %zmm1, %zmm1
|
||||
|
@ -2941,7 +2942,8 @@ define void @avg_v32i16_const(<32 x i16>* %a) {
|
|||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3]
|
||||
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = [1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8]
|
||||
; AVX512F-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3]
|
||||
; AVX512F-NEXT: vpaddd %zmm2, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0
|
||||
|
|
|
@ -633,13 +633,13 @@ entry:
|
|||
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
|
||||
; X32-AVX2-LABEL: V111:
|
||||
; X32-AVX2: ## BB#0: ## %entry
|
||||
; X32-AVX2-NEXT: vpbroadcastd LCPI29_0, %ymm1
|
||||
; X32-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
|
||||
; X32-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; X32-AVX2-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: V111:
|
||||
; X64-AVX2: ## BB#0: ## %entry
|
||||
; X64-AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
|
||||
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
|
||||
; X64-AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
|
@ -660,13 +660,13 @@ entry:
|
|||
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
|
||||
; X32-AVX2-LABEL: V113:
|
||||
; X32-AVX2: ## BB#0: ## %entry
|
||||
; X32-AVX2-NEXT: vbroadcastss LCPI30_0, %ymm1
|
||||
; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
|
||||
; X32-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||||
; X32-AVX2-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: V113:
|
||||
; X64-AVX2: ## BB#0: ## %entry
|
||||
; X64-AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125,-0.0078125]
|
||||
; X64-AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
|
@ -687,12 +687,12 @@ entry:
|
|||
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
|
||||
; X32-LABEL: _e2:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: vbroadcastss LCPI31_0, %xmm0
|
||||
; X32-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: _e2:
|
||||
; X64: ## BB#0:
|
||||
; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
|
||||
; X64-NEXT: retq
|
||||
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
|
||||
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -290,19 +290,19 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
|||
;
|
||||
; HASWELL-LABEL: v4f32_no_estimate:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
|
||||
; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||
; HASWELL-NO-FMA-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4f32_no_estimate:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
|
||||
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
|
||||
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
|
@ -361,7 +361,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
|||
; HASWELL-LABEL: v4f32_one_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
|
@ -370,7 +370,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
|
@ -379,7 +379,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
|||
; KNL-LABEL: v4f32_one_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; KNL-NEXT: retq # sched: [1:1.00]
|
||||
|
@ -468,7 +468,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
|||
; HASWELL-LABEL: v4f32_two_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
|
@ -480,7 +480,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||
|
@ -493,7 +493,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
|||
; KNL-LABEL: v4f32_two_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
|
@ -504,7 +504,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
|||
; SKX-LABEL: v4f32_two_step:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
|
@ -552,19 +552,19 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
|||
;
|
||||
; HASWELL-LABEL: v8f32_no_estimate:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
|
||||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
||||
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||
; HASWELL-NO-FMA-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8f32_no_estimate:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
||||
; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
|
||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
|
@ -630,7 +630,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
|||
; HASWELL-LABEL: v8f32_one_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
||||
|
@ -639,7 +639,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||
|
@ -648,7 +648,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
|||
; KNL-LABEL: v8f32_one_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; KNL-NEXT: retq # sched: [1:1.00]
|
||||
|
@ -750,7 +750,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
|||
; HASWELL-LABEL: v8f32_two_step:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
|
@ -762,7 +762,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||
|
@ -775,7 +775,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
|||
; KNL-LABEL: v8f32_two_step:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
|
@ -786,7 +786,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
|||
; SKX-LABEL: v8f32_two_step:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
|
|
|
@ -415,7 +415,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
|||
; HASWELL-LABEL: v4f32_one_step2:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
||||
|
@ -425,7 +425,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
|
@ -435,7 +435,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
|||
; KNL-LABEL: v4f32_one_step2:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
||||
|
@ -514,7 +514,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
|||
; HASWELL-LABEL: v4f32_one_step_2_divs:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
||||
|
@ -525,7 +525,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
|
@ -536,7 +536,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
|||
; KNL-LABEL: v4f32_one_step_2_divs:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
||||
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
||||
|
@ -635,7 +635,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
|||
; HASWELL-LABEL: v4f32_two_step2:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
|
@ -648,7 +648,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [4:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||
|
@ -662,7 +662,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
|||
; KNL-LABEL: v4f32_two_step2:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
|
@ -674,7 +674,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
|||
; SKX-LABEL: v4f32_two_step2:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
||||
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
||||
|
@ -751,7 +751,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
|||
; HASWELL-LABEL: v8f32_one_step2:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
||||
|
@ -761,7 +761,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
|
@ -771,7 +771,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
|||
; KNL-LABEL: v8f32_one_step2:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
||||
|
@ -859,7 +859,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
|||
; HASWELL-LABEL: v8f32_one_step_2_divs:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
|
||||
|
@ -870,7 +870,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
|
@ -881,7 +881,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
|||
; KNL-LABEL: v8f32_one_step_2_divs:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
||||
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
|
||||
|
@ -994,7 +994,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
|||
; HASWELL-LABEL: v8f32_two_step2:
|
||||
; HASWELL: # BB#0:
|
||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
|
@ -1007,7 +1007,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
|||
; HASWELL-NO-FMA: # BB#0:
|
||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||
|
@ -1021,7 +1021,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
|||
; KNL-LABEL: v8f32_two_step2:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
|
@ -1033,7 +1033,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
|||
; SKX-LABEL: v8f32_two_step2:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
||||
|
|
|
@ -153,14 +153,16 @@ define <32 x i16> @test7(<32 x i16> %a) {
|
|||
;
|
||||
; AVX2-LABEL: test7:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
|
||||
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
|
||||
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test7:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
|
||||
; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
|
||||
; AVX512-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; AVX512-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpmullw %ymm2, %ymm1, %ymm1
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -183,7 +185,8 @@ define <16 x i32> @test8(<16 x i32> %a) {
|
|||
;
|
||||
; AVX2-LABEL: test8:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
|
||||
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [1,1,2,3,1,1,2,3]
|
||||
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllvd %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue