[X86][MMX] Reapply: Add MMX instructions to foldable tables

Reapply r230248.

Teach the peephole optimizer to work with MMX instructions by adding
entries into the foldable tables. This covers folding opportunities not
handled during isel.

llvm-svn: 230499
This commit is contained in:
Bruno Cardoso Lopes 2015-02-25 15:14:02 +00:00
parent 48b10681f9
commit ab7afa9144
3 changed files with 230 additions and 2 deletions

View File

@ -547,6 +547,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::UCOMISDrr, X86::UCOMISDrm, 0 },
{ X86::UCOMISSrr, X86::UCOMISSrm, 0 },
// MMX version of foldable instructions
{ X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 },
{ X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 },
{ X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 },
{ X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 },
{ X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 },
{ X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
{ X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 },
{ X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 },
{ X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 },
{ X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
// AVX 128-bit versions of foldable instructions
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
@ -1117,6 +1129,78 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
{ X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
// MMX version of foldable instructions
{ X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 },
{ X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 },
{ X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 },
{ X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 },
{ X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 },
{ X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 },
{ X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 },
{ X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 },
{ X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 },
{ X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 },
{ X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 },
{ X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 },
{ X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 },
{ X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 },
{ X86::MMX_PANDirr, X86::MMX_PANDirm, 0 },
{ X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 },
{ X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 },
{ X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 },
{ X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 },
{ X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 },
{ X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 },
{ X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 },
{ X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 },
{ X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 },
{ X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 },
{ X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 },
{ X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 },
{ X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 },
{ X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 },
{ X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 },
{ X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
{ X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 },
{ X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 },
{ X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 },
{ X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 },
{ X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 },
{ X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 },
{ X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 },
{ X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 },
{ X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 },
{ X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 },
{ X86::MMX_PORirr, X86::MMX_PORirm, 0 },
{ X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 },
{ X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 },
{ X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 },
{ X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 },
{ X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 },
{ X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 },
{ X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 },
{ X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 },
{ X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 },
{ X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 },
{ X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
{ X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
{ X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
{ X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 },
{ X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 },
{ X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 },
{ X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 },
{ X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 },
{ X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 },
{ X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 },
{ X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 },
{ X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 },
{ X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 },
{ X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 },
{ X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 },
{ X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 },
{ X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 },
{ X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
// AVX 128-bit versions of foldable instructions
{ X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
{ X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },

View File

@ -135,3 +135,148 @@ entry:
ret i64 %4
}
declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32)
define i64 @tt0(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt0:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: paddb (%[[REG3:[a-z]+]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
define i64 @tt1(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt1:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: paddw (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
define i64 @tt2(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt2:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: paddd (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
define i64 @tt3(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt3:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: paddq (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
define i64 @tt4(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt4:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: paddusb (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
define i64 @tt5(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt5:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: paddusw (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
define i64 @tt6(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt6:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: psrlw (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx)
define i64 @tt7(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt7:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: psrld (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx)
define i64 @tt8(x86_mmx %t, x86_mmx* %q) {
; CHECK-LABEL: tt8:
; CHECK: # BB#0:{{.*}} %entry
; CHECK: psrlq (%[[REG3]]), %mm0
; CHECK-NEXT: movd %mm0, %rax
; CHECK-NEXT: emms
; CHECK-NEXT: retq
entry:
%v = load x86_mmx* %q
%u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v)
%s = bitcast x86_mmx %u to i64
call void @llvm.x86.mmx.emms()
ret i64 %s
}
declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)

View File

@ -49,8 +49,7 @@ entry:
define i32 @test2(i32* nocapture readonly %ptr) {
; CHECK-LABEL: test2:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: movq (%rdi), %mm0
; CHECK-NEXT: pshufw $232, %mm0, %mm0
; CHECK-NEXT: pshufw $232, (%rdi), %mm0
; CHECK-NEXT: movd %mm0, %eax
; CHECK-NEXT: emms
; CHECK-NEXT: retq