forked from OSchip/llvm-project
Shrink down code and add for free AVX {MIN,MAX}P{S,D}{rm,rr} instructions
llvm-svn: 106366
This commit is contained in:
parent
c60cecd88b
commit
1e205f6b1c
|
@ -788,6 +788,24 @@ multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||
defm V#NAME#SD : sse12_fp_scalar<opc,
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
OpNode, FR64, f64mem>, XD, VEX_4V;
|
||||
|
||||
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
|
||||
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
|
||||
VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle>,
|
||||
VEX_4V;
|
||||
|
||||
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
|
||||
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode,
|
||||
VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble>,
|
||||
OpSize, VEX_4V;
|
||||
|
||||
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
"", "_ss", ssmem, sse_load_f32>, XS, VEX_4V;
|
||||
|
||||
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
"2", "_sd", sdmem, sse_load_f64>, XD, VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
|
@ -798,72 +816,23 @@ multiclass sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||
defm SD : sse12_fp_scalar<opc,
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
||||
OpNode, FR64, f64mem>, XD;
|
||||
}
|
||||
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
|
||||
"ps\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v4f32,
|
||||
f128mem, memopv4f32, SSEPackedSingle>, TB;
|
||||
|
||||
// Vector operation, reg+reg.
|
||||
def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
|
||||
"pd\t{$src2, $dst|$dst, $src2}"), OpNode, VR128, v2f64,
|
||||
f128mem, memopv2f64, SSEPackedDouble>, TB, OpSize;
|
||||
|
||||
def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
|
||||
// Vector operation, reg+mem.
|
||||
def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
|
||||
|
||||
def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f128mem:$src2),
|
||||
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
|
||||
|
||||
// Intrinsic operation, reg+reg.
|
||||
def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
|
||||
!strconcat(OpcodeStr, "_ss")) VR128:$src1,
|
||||
VR128:$src2))]> {
|
||||
// int_x86_sse_xxx_ss
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
"", "_ss", ssmem, sse_load_f32>, XS;
|
||||
|
||||
def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
|
||||
!strconcat(OpcodeStr, "_sd")) VR128:$src1,
|
||||
VR128:$src2))]> {
|
||||
// int_x86_sse2_xxx_sd
|
||||
let isCommutable = Commutable;
|
||||
"2", "_sd", sdmem, sse_load_f64>, XD;
|
||||
}
|
||||
|
||||
// Intrinsic operation, reg+mem.
|
||||
def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, ssmem:$src2),
|
||||
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse_",
|
||||
!strconcat(OpcodeStr, "_ss")) VR128:$src1,
|
||||
sse_load_f32:$src2))]>;
|
||||
// int_x86_sse_xxx_ss
|
||||
|
||||
def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (!nameconcat<Intrinsic>("int_x86_sse2_",
|
||||
!strconcat(OpcodeStr, "_sd")) VR128:$src1,
|
||||
sse_load_f64:$src2))]>;
|
||||
// int_x86_sse2_xxx_sd
|
||||
|
||||
// Vector intrinsic operation, reg+reg.
|
||||
def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
|
|
|
@ -10212,3 +10212,35 @@ pshufb CPI1_0(%rip), %xmm1
|
|||
// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
|
||||
vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
|
||||
// CHECK: vmaxps %xmm2, %xmm4, %xmm6
|
||||
// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
|
||||
vmaxps %xmm2, %xmm4, %xmm6
|
||||
|
||||
// CHECK: vmaxpd %xmm2, %xmm4, %xmm6
|
||||
// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
|
||||
vmaxpd %xmm2, %xmm4, %xmm6
|
||||
|
||||
// CHECK: vminps %xmm2, %xmm4, %xmm6
|
||||
// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
|
||||
vminps %xmm2, %xmm4, %xmm6
|
||||
|
||||
// CHECK: vminpd %xmm2, %xmm4, %xmm6
|
||||
// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
|
||||
vminpd %xmm2, %xmm4, %xmm6
|
||||
|
||||
// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
|
||||
vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
|
||||
// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
|
||||
vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
|
||||
// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
|
||||
vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
|
||||
// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
|
||||
vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
|
||||
|
||||
|
|
|
@ -264,3 +264,35 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
|
|||
// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
|
||||
vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vmaxps %xmm10, %xmm14, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
|
||||
vmaxps %xmm10, %xmm14, %xmm12
|
||||
|
||||
// CHECK: vmaxpd %xmm10, %xmm14, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
|
||||
vmaxpd %xmm10, %xmm14, %xmm12
|
||||
|
||||
// CHECK: vminps %xmm10, %xmm14, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
|
||||
vminps %xmm10, %xmm14, %xmm12
|
||||
|
||||
// CHECK: vminpd %xmm10, %xmm14, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
|
||||
vminpd %xmm10, %xmm14, %xmm12
|
||||
|
||||
// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
|
||||
vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
|
||||
vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
|
||||
vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
|
||||
vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
|
||||
|
||||
|
|
Loading…
Reference in New Issue