[TargetLowering] Only demand a funnelshift's modulo amount bits

ISD::FSHL/FSHR shift amount values are guaranteed to act as a modulo amount, so for power-of-2 bitwidths we only need the lowest bits.
This commit is contained in:
Simon Pilgrim 2020-03-16 13:46:28 +00:00
parent 85462aefb5
commit 2b3b453a82
3 changed files with 87 additions and 82 deletions

View File

@ -1634,6 +1634,14 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
Known2, TLO, Depth + 1))
return true;
}
break;
}
case ISD::ROTL:

View File

@ -16,13 +16,13 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_and_b32 s2, s2, 31
; SI-NEXT: s_sub_i32 s3, 32, s2
; SI-NEXT: v_mov_b32_e32 v0, s1
; SI-NEXT: s_sub_i32 s1, 32, s2
; SI-NEXT: v_mov_b32_e32 v1, s1
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: s_and_b32 s1, s2, 31
; SI-NEXT: v_alignbit_b32 v0, s0, v0, v1
; SI-NEXT: v_mov_b32_e32 v1, s0
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@ -32,13 +32,13 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s2, s2, 31
; VI-NEXT: s_sub_i32 s3, 32, s2
; VI-NEXT: v_mov_b32_e32 v0, s1
; VI-NEXT: s_sub_i32 s1, 32, s2
; VI-NEXT: v_mov_b32_e32 v2, s1
; VI-NEXT: s_and_b32 s1, s2, 31
; VI-NEXT: v_mov_b32_e32 v2, s3
; VI-NEXT: v_mov_b32_e32 v1, s0
; VI-NEXT: v_alignbit_b32 v0, s0, v0, v2
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; VI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
@ -50,13 +50,13 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_and_b32 s2, s2, 31
; GFX9-NEXT: s_sub_i32 s3, 32, s2
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: s_sub_i32 s1, 32, s2
; GFX9-NEXT: v_mov_b32_e32 v2, s1
; GFX9-NEXT: s_and_b32 s1, s2, 31
; GFX9-NEXT: v_mov_b32_e32 v2, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v2
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
@ -70,12 +70,12 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; R600-NEXT: SUB_INT * T1.W, literal.x, PV.W,
; R600-NEXT: SUB_INT * T0.W, literal.x, KC0[3].X,
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; R600-NEXT: BIT_ALIGN_INT * T1.W, KC0[2].Z, KC0[2].W, PV.W,
; R600-NEXT: CNDE_INT T0.X, T0.W, KC0[2].Z, PV.W,
; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[2].Z, KC0[2].W, PV.W,
; R600-NEXT: AND_INT * T1.W, KC0[3].X, literal.x,
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; R600-NEXT: CNDE_INT T0.X, PS, KC0[2].Z, PV.W,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
@ -149,15 +149,15 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s9
; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: s_sub_i32 s10, 32, s1
; SI-NEXT: v_mov_b32_e32 v1, s10
; SI-NEXT: s_and_b32 s0, s0, 31
; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: v_alignbit_b32 v0, s3, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: s_sub_i32 s1, 32, s0
; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
; SI-NEXT: s_and_b32 s0, s0, 31
; SI-NEXT: v_mov_b32_e32 v0, s8
; SI-NEXT: v_mov_b32_e32 v2, s1
; SI-NEXT: v_alignbit_b32 v0, s2, v0, v2
@ -175,15 +175,15 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s7
; VI-NEXT: s_sub_i32 s8, 32, s1
; VI-NEXT: v_mov_b32_e32 v1, s8
; VI-NEXT: s_and_b32 s1, s1, 31
; VI-NEXT: s_sub_i32 s7, 32, s1
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_alignbit_b32 v0, s5, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: s_sub_i32 s1, 32, s0
; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_mov_b32_e32 v0, s6
; VI-NEXT: v_mov_b32_e32 v2, s1
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v2
@ -203,15 +203,15 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s7
; GFX9-NEXT: s_sub_i32 s8, 32, s1
; GFX9-NEXT: v_mov_b32_e32 v1, s8
; GFX9-NEXT: s_and_b32 s1, s1, 31
; GFX9-NEXT: s_sub_i32 s7, 32, s1
; GFX9-NEXT: v_mov_b32_e32 v1, s7
; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_alignbit_b32 v0, s5, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: s_sub_i32 s1, 32, s0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s6
; GFX9-NEXT: v_mov_b32_e32 v2, s1
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v2
@ -225,25 +225,24 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
;
; R600-LABEL: fshl_v2i32:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 14, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: AND_INT * T0.W, KC0[4].X, literal.x,
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; R600-NEXT: SUB_INT T1.W, literal.x, PV.W,
; R600-NEXT: AND_INT * T2.W, KC0[3].W, literal.y,
; R600-NEXT: 32(4.484155e-44), 31(4.344025e-44)
; R600-NEXT: SUB_INT T0.Z, literal.x, PS,
; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].X, KC0[3].Z, PV.W,
; R600-NEXT: SETE_INT * T0.W, T0.W, 0.0,
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; R600-NEXT: CNDE_INT T0.Y, PS, PV.W, KC0[3].X,
; R600-NEXT: AND_INT T0.W, KC0[4].X, literal.x,
; R600-NEXT: SUB_INT * T1.W, literal.y, KC0[4].X,
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
; R600-NEXT: AND_INT T0.Y, KC0[3].W, literal.x,
; R600-NEXT: SUB_INT T0.Z, literal.y, KC0[3].W,
; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].X, KC0[3].Z, PS,
; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0,
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
; R600-NEXT: CNDE_INT T1.Y, PS, PV.W, KC0[3].X,
; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[2].W, KC0[3].Y, PV.Z,
; R600-NEXT: SETE_INT * T1.W, T2.W, 0.0,
; R600-NEXT: CNDE_INT T0.X, PS, PV.W, KC0[2].W,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: SETE_INT * T1.W, PV.Y, 0.0,
; R600-NEXT: CNDE_INT T1.X, PS, PV.W, KC0[2].W,
; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
@ -327,31 +326,31 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s15
; SI-NEXT: s_and_b32 s3, s3, 31
; SI-NEXT: s_sub_i32 s16, 32, s3
; SI-NEXT: v_mov_b32_e32 v1, s16
; SI-NEXT: s_and_b32 s2, s2, 31
; SI-NEXT: s_and_b32 s3, s3, 31
; SI-NEXT: v_alignbit_b32 v0, s11, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
; SI-NEXT: v_mov_b32_e32 v1, s11
; SI-NEXT: s_sub_i32 s3, 32, s2
; SI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
; SI-NEXT: s_and_b32 s2, s2, 31
; SI-NEXT: v_mov_b32_e32 v0, s14
; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: v_alignbit_b32 v0, s10, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; SI-NEXT: v_mov_b32_e32 v1, s10
; SI-NEXT: s_sub_i32 s2, 32, s1
; SI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; SI-NEXT: s_and_b32 s1, s1, 31
; SI-NEXT: v_mov_b32_e32 v0, s13
; SI-NEXT: v_mov_b32_e32 v1, s2
; SI-NEXT: s_and_b32 s0, s0, 31
; SI-NEXT: v_alignbit_b32 v0, s9, v0, v1
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; SI-NEXT: v_mov_b32_e32 v1, s9
; SI-NEXT: s_sub_i32 s1, 32, s0
; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
; SI-NEXT: s_and_b32 s0, s0, 31
; SI-NEXT: v_mov_b32_e32 v0, s12
; SI-NEXT: v_mov_b32_e32 v4, s1
; SI-NEXT: v_alignbit_b32 v0, s8, v0, v4
@ -369,31 +368,31 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s11
; VI-NEXT: s_sub_i32 s14, 32, s3
; VI-NEXT: v_mov_b32_e32 v1, s14
; VI-NEXT: s_and_b32 s3, s3, 31
; VI-NEXT: s_sub_i32 s11, 32, s3
; VI-NEXT: v_mov_b32_e32 v1, s11
; VI-NEXT: s_and_b32 s2, s2, 31
; VI-NEXT: v_alignbit_b32 v0, s7, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: s_sub_i32 s3, 32, s2
; VI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
; VI-NEXT: s_and_b32 s2, s2, 31
; VI-NEXT: v_mov_b32_e32 v0, s10
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: s_and_b32 s1, s1, 31
; VI-NEXT: v_alignbit_b32 v0, s6, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; VI-NEXT: v_mov_b32_e32 v1, s6
; VI-NEXT: s_sub_i32 s2, 32, s1
; VI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; VI-NEXT: s_and_b32 s1, s1, 31
; VI-NEXT: v_mov_b32_e32 v0, s9
; VI-NEXT: v_mov_b32_e32 v1, s2
; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_alignbit_b32 v0, s5, v0, v1
; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: s_sub_i32 s1, 32, s0
; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
; VI-NEXT: s_and_b32 s0, s0, 31
; VI-NEXT: v_mov_b32_e32 v0, s8
; VI-NEXT: v_mov_b32_e32 v4, s1
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v4
@ -413,31 +412,31 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s11
; GFX9-NEXT: s_sub_i32 s14, 32, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s14
; GFX9-NEXT: s_and_b32 s3, s3, 31
; GFX9-NEXT: s_sub_i32 s11, 32, s3
; GFX9-NEXT: v_mov_b32_e32 v1, s11
; GFX9-NEXT: s_and_b32 s2, s2, 31
; GFX9-NEXT: v_alignbit_b32 v0, s7, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s7
; GFX9-NEXT: s_sub_i32 s3, 32, s2
; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
; GFX9-NEXT: s_and_b32 s2, s2, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s10
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: s_and_b32 s1, s1, 31
; GFX9-NEXT: v_alignbit_b32 v0, s6, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s6
; GFX9-NEXT: s_sub_i32 s2, 32, s1
; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
; GFX9-NEXT: s_and_b32 s1, s1, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s9
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_alignbit_b32 v0, s5, v0, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: s_sub_i32 s1, 32, s0
; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
; GFX9-NEXT: s_and_b32 s0, s0, 31
; GFX9-NEXT: v_mov_b32_e32 v0, s8
; GFX9-NEXT: v_mov_b32_e32 v4, s1
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v4
@ -451,38 +450,36 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
;
; R600-LABEL: fshl_v4i32:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 27, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; R600-NEXT: ALU 25, @4, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: ALU clause starting at 4:
; R600-NEXT: AND_INT T0.W, KC0[6].X, literal.x,
; R600-NEXT: AND_INT * T1.W, KC0[5].W, literal.x,
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; R600-NEXT: SUB_INT * T2.W, literal.x, PV.W,
; R600-NEXT: SUB_INT * T0.W, literal.x, KC0[6].X,
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; R600-NEXT: AND_INT T0.X, KC0[5].Y, literal.x,
; R600-NEXT: BIT_ALIGN_INT T0.Y, KC0[4].X, KC0[5].X, PV.W,
; R600-NEXT: SETE_INT T0.Z, T0.W, 0.0,
; R600-NEXT: SUB_INT * T0.W, literal.y, T1.W,
; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, PV.W,
; R600-NEXT: AND_INT * T1.W, KC0[6].X, literal.x,
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; R600-NEXT: AND_INT T0.X, KC0[5].Z, literal.x,
; R600-NEXT: SUB_INT T0.Y, literal.y, KC0[5].Z,
; R600-NEXT: SETE_INT T0.Z, PV.W, 0.0,
; R600-NEXT: SUB_INT T1.W, literal.y, KC0[5].W,
; R600-NEXT: AND_INT * T2.W, KC0[5].W, literal.x,
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
; R600-NEXT: AND_INT * T2.W, KC0[5].Z, literal.x,
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; R600-NEXT: SUB_INT T1.Y, literal.x, PV.W,
; R600-NEXT: BIT_ALIGN_INT T1.Z, KC0[3].W, KC0[4].W, T0.W,
; R600-NEXT: SETE_INT * T0.W, T1.W, 0.0,
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; R600-NEXT: CNDE_INT * T1.W, T0.Z, T0.Y, KC0[4].X,
; R600-NEXT: CNDE_INT T1.Z, T0.W, T1.Z, KC0[3].W,
; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[3].Z, KC0[4].Z, T1.Y,
; R600-NEXT: SETE_INT * T2.W, T2.W, 0.0,
; R600-NEXT: CNDE_INT T1.Y, PS, PV.W, KC0[3].Z,
; R600-NEXT: SUB_INT * T0.W, literal.x, T0.X,
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[3].Y, KC0[4].Y, PV.W,
; R600-NEXT: SETE_INT T1.Z, PS, 0.0,
; R600-NEXT: BIT_ALIGN_INT * T1.W, KC0[3].W, KC0[4].W, PV.W,
; R600-NEXT: CNDE_INT * T0.W, T0.Z, T0.W, KC0[4].X,
; R600-NEXT: CNDE_INT T0.Z, T1.Z, T1.W, KC0[3].W,
; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].Z, KC0[4].Z, T0.Y,
; R600-NEXT: SETE_INT * T2.W, T0.X, 0.0,
; R600-NEXT: CNDE_INT T1.X, PS, PV.W, KC0[3].Y,
; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; R600-NEXT: CNDE_INT T0.Y, PS, PV.W, KC0[3].Z,
; R600-NEXT: AND_INT T1.W, KC0[5].Y, literal.x,
; R600-NEXT: SUB_INT * T2.W, literal.y, KC0[5].Y,
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
; R600-NEXT: BIT_ALIGN_INT T2.W, KC0[3].Y, KC0[4].Y, PS,
; R600-NEXT: SETE_INT * T1.W, PV.W, 0.0,
; R600-NEXT: CNDE_INT T0.X, PS, PV.W, KC0[3].Y,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)

View File

@ -88,9 +88,9 @@ declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
; X86-LABEL: n6_fshl:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shldl %cl, %edx, %eax
; X86-NEXT: retl
;
@ -108,9 +108,9 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
; X86-LABEL: n7_fshr:
; X86: # %bb.0:
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shrdl %cl, %edx, %eax
; X86-NEXT: retl
;