forked from OSchip/llvm-project
[AVX-512] Add broadcast VPTERNLOG instructions to special case commuting switch.
The instructions are marked commutable, but without special handling we don't get the immediate correct. While here also remove the masked memory forms that aren't commutable. llvm-svn: 295602
This commit is contained in:
parent
94de4b9330
commit
218d1a020e
|
@ -5292,18 +5292,30 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
|
|||
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
|
||||
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
|
||||
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
|
||||
case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
|
||||
case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
|
||||
case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
|
||||
case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
|
||||
case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
|
||||
case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
|
||||
case X86::VPTERNLOGDZrrik:
|
||||
case X86::VPTERNLOGDZ128rrik:
|
||||
case X86::VPTERNLOGDZ256rrik:
|
||||
case X86::VPTERNLOGQZrrik:
|
||||
case X86::VPTERNLOGQZ128rrik:
|
||||
case X86::VPTERNLOGQZ256rrik:
|
||||
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
|
||||
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
|
||||
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
|
||||
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
|
||||
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
|
||||
case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: {
|
||||
case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
|
||||
case X86::VPTERNLOGDZ128rmbi:
|
||||
case X86::VPTERNLOGDZ256rmbi:
|
||||
case X86::VPTERNLOGDZrmbi:
|
||||
case X86::VPTERNLOGQZ128rmbi:
|
||||
case X86::VPTERNLOGQZ256rmbi:
|
||||
case X86::VPTERNLOGQZrmbi:
|
||||
case X86::VPTERNLOGDZ128rmbikz:
|
||||
case X86::VPTERNLOGDZ256rmbikz:
|
||||
case X86::VPTERNLOGDZrmbikz:
|
||||
case X86::VPTERNLOGQZ128rmbikz:
|
||||
case X86::VPTERNLOGQZ256rmbikz:
|
||||
case X86::VPTERNLOGQZrmbikz: {
|
||||
auto &WorkingMI = cloneIfNew(MI);
|
||||
if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2))
|
||||
return nullptr;
|
||||
|
@ -5484,18 +5496,30 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
|
|||
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
|
||||
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
|
||||
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
|
||||
case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
|
||||
case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
|
||||
case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
|
||||
case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
|
||||
case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
|
||||
case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
|
||||
case X86::VPTERNLOGDZrrik:
|
||||
case X86::VPTERNLOGDZ128rrik:
|
||||
case X86::VPTERNLOGDZ256rrik:
|
||||
case X86::VPTERNLOGQZrrik:
|
||||
case X86::VPTERNLOGQZ128rrik:
|
||||
case X86::VPTERNLOGQZ256rrik:
|
||||
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
|
||||
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
|
||||
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
|
||||
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
|
||||
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
|
||||
case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
|
||||
case X86::VPTERNLOGDZ128rmbi:
|
||||
case X86::VPTERNLOGDZ256rmbi:
|
||||
case X86::VPTERNLOGDZrmbi:
|
||||
case X86::VPTERNLOGQZ128rmbi:
|
||||
case X86::VPTERNLOGQZ256rmbi:
|
||||
case X86::VPTERNLOGQZrmbi:
|
||||
case X86::VPTERNLOGDZ128rmbikz:
|
||||
case X86::VPTERNLOGDZ256rmbikz:
|
||||
case X86::VPTERNLOGDZrmbikz:
|
||||
case X86::VPTERNLOGQZ128rmbikz:
|
||||
case X86::VPTERNLOGQZ256rmbikz:
|
||||
case X86::VPTERNLOGQZrmbikz:
|
||||
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
|
||||
default:
|
||||
const X86InstrFMA3Group *FMA3Group =
|
||||
|
|
|
@ -559,7 +559,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1,
|
|||
define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_102_broadcast2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2_scalar = load i32, i32* %ptr_x2
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
|
||||
|
@ -571,7 +571,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x
|
|||
define <16 x i32> @vpternlog_v16i32_210_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_210_broadcast0:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0_scalar = load i32, i32* %ptr_x0
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
|
||||
|
@ -851,7 +851,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast2_maskz(<16 x i32> %x0, <16 x i
|
|||
; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x2scalar = load i32, i32* %x2ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
|
||||
|
@ -864,7 +864,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast0_maskz(i32* %x0ptr, <16 x i32>
|
|||
; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x0scalar = load i32, i32* %x0ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
|
||||
|
|
Loading…
Reference in New Issue