forked from OSchip/llvm-project
[X86][MMX] Reapply: Add MMX instructions to foldable tables
Reapply r230248. Teach the peephole optimizer to work with MMX instructions by adding entries into the foldable tables. This covers folding opportunities not handled during isel. llvm-svn: 230499
This commit is contained in:
parent
48b10681f9
commit
ab7afa9144
|
@ -547,6 +547,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::UCOMISDrr, X86::UCOMISDrm, 0 },
|
||||
{ X86::UCOMISSrr, X86::UCOMISSrm, 0 },
|
||||
|
||||
// MMX version of foldable instructions
|
||||
{ X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 },
|
||||
{ X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 },
|
||||
{ X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 },
|
||||
{ X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 },
|
||||
{ X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 },
|
||||
{ X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
|
||||
{ X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 },
|
||||
{ X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 },
|
||||
{ X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 },
|
||||
{ X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
|
||||
|
||||
// AVX 128-bit versions of foldable instructions
|
||||
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
|
||||
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
|
||||
|
@ -1117,6 +1129,78 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
|
||||
{ X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
|
||||
|
||||
// MMX version of foldable instructions
|
||||
{ X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 },
|
||||
{ X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 },
|
||||
{ X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 },
|
||||
{ X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 },
|
||||
{ X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 },
|
||||
{ X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 },
|
||||
{ X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 },
|
||||
{ X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 },
|
||||
{ X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 },
|
||||
{ X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 },
|
||||
{ X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 },
|
||||
{ X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 },
|
||||
{ X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 },
|
||||
{ X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 },
|
||||
{ X86::MMX_PANDirr, X86::MMX_PANDirm, 0 },
|
||||
{ X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 },
|
||||
{ X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 },
|
||||
{ X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 },
|
||||
{ X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 },
|
||||
{ X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 },
|
||||
{ X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 },
|
||||
{ X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 },
|
||||
{ X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 },
|
||||
{ X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 },
|
||||
{ X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 },
|
||||
{ X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 },
|
||||
{ X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 },
|
||||
{ X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 },
|
||||
{ X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 },
|
||||
{ X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 },
|
||||
{ X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
|
||||
{ X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 },
|
||||
{ X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 },
|
||||
{ X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 },
|
||||
{ X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 },
|
||||
{ X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 },
|
||||
{ X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 },
|
||||
{ X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 },
|
||||
{ X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 },
|
||||
{ X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 },
|
||||
{ X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 },
|
||||
{ X86::MMX_PORirr, X86::MMX_PORirm, 0 },
|
||||
{ X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 },
|
||||
{ X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 },
|
||||
{ X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 },
|
||||
{ X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 },
|
||||
{ X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 },
|
||||
{ X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 },
|
||||
{ X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 },
|
||||
{ X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 },
|
||||
{ X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 },
|
||||
{ X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 },
|
||||
{ X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
|
||||
{ X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
|
||||
{ X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
|
||||
{ X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 },
|
||||
{ X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 },
|
||||
{ X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 },
|
||||
{ X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 },
|
||||
{ X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 },
|
||||
{ X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 },
|
||||
{ X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 },
|
||||
{ X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 },
|
||||
{ X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 },
|
||||
{ X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 },
|
||||
{ X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 },
|
||||
{ X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 },
|
||||
{ X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 },
|
||||
{ X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 },
|
||||
{ X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
|
||||
|
||||
// AVX 128-bit versions of foldable instructions
|
||||
{ X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
|
||||
{ X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },
|
||||
|
|
|
@ -135,3 +135,148 @@ entry:
|
|||
ret i64 %4
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32)
|
||||
|
||||
define i64 @tt0(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt0:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: paddb (%[[REG3:[a-z]+]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
|
||||
declare void @llvm.x86.mmx.emms()
|
||||
|
||||
define i64 @tt1(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt1:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: paddw (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
|
||||
|
||||
define i64 @tt2(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt2:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: paddd (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
|
||||
|
||||
define i64 @tt3(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt3:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: paddq (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
|
||||
|
||||
define i64 @tt4(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt4:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: paddusb (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
|
||||
|
||||
define i64 @tt5(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt5:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: paddusw (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
|
||||
|
||||
define i64 @tt6(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt6:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: psrlw (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx)
|
||||
|
||||
define i64 @tt7(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt7:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: psrld (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx)
|
||||
|
||||
define i64 @tt8(x86_mmx %t, x86_mmx* %q) {
|
||||
; CHECK-LABEL: tt8:
|
||||
; CHECK: # BB#0:{{.*}} %entry
|
||||
; CHECK: psrlq (%[[REG3]]), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %rax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%v = load x86_mmx* %q
|
||||
%u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v)
|
||||
%s = bitcast x86_mmx %u to i64
|
||||
call void @llvm.x86.mmx.emms()
|
||||
ret i64 %s
|
||||
}
|
||||
declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)
|
||||
|
|
|
@ -49,8 +49,7 @@ entry:
|
|||
define i32 @test2(i32* nocapture readonly %ptr) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: movq (%rdi), %mm0
|
||||
; CHECK-NEXT: pshufw $232, %mm0, %mm0
|
||||
; CHECK-NEXT: pshufw $232, (%rdi), %mm0
|
||||
; CHECK-NEXT: movd %mm0, %eax
|
||||
; CHECK-NEXT: emms
|
||||
; CHECK-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue