forked from OSchip/llvm-project
[AArch64] Fix lowering for fshl/fshr with SVE types.
These operations don't exist natively, so just let the target-independent code expand to plain shifts. The generated sequences could probably be optimized a bit more, but they seem good enough for now. Differential Revision: https://reviews.llvm.org/D101574
This commit is contained in:
parent
7308862ff5
commit
6e6ae6c727
|
@ -1156,6 +1156,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||
setOperationAction(ISD::SELECT_CC, VT, Expand);
|
||||
setOperationAction(ISD::ROTL, VT, Expand);
|
||||
setOperationAction(ISD::ROTR, VT, Expand);
|
||||
}
|
||||
|
||||
// Illegal unpacked integer vector types.
|
||||
|
|
|
@ -1075,3 +1075,134 @@ define <vscale x 64 x i1> @cmp_split_64(<vscale x 64 x i8> %a, <vscale x 64 x i8
|
|||
%cmp = icmp sgt <vscale x 64 x i8> %a, %b
|
||||
ret <vscale x 64 x i1> %cmp
|
||||
}
|
||||
|
||||
; Funnel shifts
|
||||
declare <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i64> @llvm.fshl.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i64>)
|
||||
declare <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
define <vscale x 2 x i64> @fshl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c){
|
||||
; CHECK-LABEL: fshl_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z3.d, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: eor z3.d, z2.d, z3.d
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: lsr z1.d, z1.d, #1
|
||||
; CHECK-NEXT: and z2.d, z2.d, #0x3f
|
||||
; CHECK-NEXT: and z3.d, z3.d, #0x3f
|
||||
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d
|
||||
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d
|
||||
; CHECK-NEXT: orr z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%fshl = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
|
||||
ret <vscale x 2 x i64> %fshl
|
||||
}
|
||||
|
||||
define <vscale x 4 x i64> @fshl_illegal_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c){
|
||||
; CHECK-LABEL: fshl_illegal_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z6.d, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: eor z7.d, z5.d, z6.d
|
||||
; CHECK-NEXT: and z5.d, z5.d, #0x3f
|
||||
; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z5.d
|
||||
; CHECK-NEXT: eor z5.d, z4.d, z6.d
|
||||
; CHECK-NEXT: lsr z2.d, z2.d, #1
|
||||
; CHECK-NEXT: lsr z3.d, z3.d, #1
|
||||
; CHECK-NEXT: and z4.d, z4.d, #0x3f
|
||||
; CHECK-NEXT: and z5.d, z5.d, #0x3f
|
||||
; CHECK-NEXT: and z7.d, z7.d, #0x3f
|
||||
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d
|
||||
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, z5.d
|
||||
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, z7.d
|
||||
; CHECK-NEXT: orr z0.d, z0.d, z2.d
|
||||
; CHECK-NEXT: orr z1.d, z1.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%fshl = call <vscale x 4 x i64> @llvm.fshl.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c)
|
||||
ret <vscale x 4 x i64> %fshl
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fshl_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
|
||||
; CHECK-LABEL: fshl_rot_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, z1.d
|
||||
; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: and z2.d, z2.d, #0x3f
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0x3f
|
||||
; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d
|
||||
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: orr z0.d, z2.d, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%fshl = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x i64> %fshl
|
||||
}
|
||||
|
||||
|
||||
define <vscale x 4 x i64> @fshl_rot_illegal_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b){
|
||||
; CHECK-LABEL: fshl_rot_illegal_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z4.d, z2.d
|
||||
; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: and z4.d, z4.d, #0x3f
|
||||
; CHECK-NEXT: and z2.d, z2.d, #0x3f
|
||||
; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z0.d
|
||||
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: subr z3.d, z3.d, #0 // =0x0
|
||||
; CHECK-NEXT: and z2.d, z2.d, #0x3f
|
||||
; CHECK-NEXT: and z3.d, z3.d, #0x3f
|
||||
; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z1.d
|
||||
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d
|
||||
; CHECK-NEXT: orr z0.d, z4.d, z0.d
|
||||
; CHECK-NEXT: orr z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%fshl = call <vscale x 4 x i64> @llvm.fshl.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b)
|
||||
ret <vscale x 4 x i64> %fshl
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fshl_rot_const_i64(<vscale x 2 x i64> %a){
|
||||
; CHECK-LABEL: fshl_rot_const_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: lsr z1.d, z0.d, #61
|
||||
; CHECK-NEXT: lsl z0.d, z0.d, #3
|
||||
; CHECK-NEXT: orr z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 2 x i64> poison, i64 3, i32 0
|
||||
%shuf = shufflevector <vscale x 2 x i64> %insert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
|
||||
%fshl = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %shuf)
|
||||
ret <vscale x 2 x i64> %fshl
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fshr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c){
|
||||
; CHECK-LABEL: fshr_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z3.d, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: eor z3.d, z2.d, z3.d
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: lsl z0.d, z0.d, #1
|
||||
; CHECK-NEXT: and z2.d, z2.d, #0x3f
|
||||
; CHECK-NEXT: and z3.d, z3.d, #0x3f
|
||||
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z2.d
|
||||
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z3.d
|
||||
; CHECK-NEXT: orr z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%fshr = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
|
||||
ret <vscale x 2 x i64> %fshr
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fshr_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
|
||||
; CHECK-LABEL: fshr_rot_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, z1.d
|
||||
; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: and z2.d, z2.d, #0x3f
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0x3f
|
||||
; CHECK-NEXT: lsrr z2.d, p0/m, z2.d, z0.d
|
||||
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: orr z0.d, z2.d, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
%fshr = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x i64> %fshr
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue