forked from OSchip/llvm-project
[SLP] add tests for reduction ordering; NFC
This commit is contained in:
parent
50f4c7c785
commit
b011611e37
|
@ -74,3 +74,150 @@ for.end: ; preds = %for.inc
|
|||
|
||||
declare i32 @printf(i8* nocapture, ...)
|
||||
|
||||
; PR41312 - the order of the reduction ops should not prevent forming a reduction.
|
||||
; The 'wrong' member of the reduction requires a greater cost if grouped with the
|
||||
; other candidates in the reduction because it does not have matching predicate
|
||||
; and/or constant operand.
|
||||
|
||||
define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) {
|
||||
; CHECK-LABEL: @merge_anyof_v4f32_wrong_first(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3
|
||||
; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = fcmp ogt float [[X0]], 1.000000e+00
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[X1]], 1.000000e+00
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X2]], 1.000000e+00
|
||||
; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X3]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3WRONG]]
|
||||
; CHECK-NEXT: [[OR031:%.*]] = or i1 [[OR03]], [[CMP1]]
|
||||
; CHECK-NEXT: [[OR0312:%.*]] = or i1 [[OR031]], [[CMP2]]
|
||||
; CHECK-NEXT: [[OR03123:%.*]] = or i1 [[OR0312]], [[CMP3]]
|
||||
; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03123]], float -1.000000e+00, float 1.000000e+00
|
||||
; CHECK-NEXT: ret float [[R]]
|
||||
;
|
||||
%x0 = extractelement <4 x float> %x, i32 0
|
||||
%x1 = extractelement <4 x float> %x, i32 1
|
||||
%x2 = extractelement <4 x float> %x, i32 2
|
||||
%x3 = extractelement <4 x float> %x, i32 3
|
||||
%cmp3wrong = fcmp olt float %x3, 42.0
|
||||
%cmp0 = fcmp ogt float %x0, 1.0
|
||||
%cmp1 = fcmp ogt float %x1, 1.0
|
||||
%cmp2 = fcmp ogt float %x2, 1.0
|
||||
%cmp3 = fcmp ogt float %x3, 1.0
|
||||
%or03 = or i1 %cmp0, %cmp3wrong
|
||||
%or031 = or i1 %or03, %cmp1
|
||||
%or0312 = or i1 %or031, %cmp2
|
||||
%or03123 = or i1 %or0312, %cmp3
|
||||
%r = select i1 %or03123, float -1.0, float 1.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) {
|
||||
; CHECK-LABEL: @merge_anyof_v4f32_wrong_last(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
|
||||
; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[TMP1]], 4.200000e+01
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[X]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]]
|
||||
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], float -1.000000e+00, float 1.000000e+00
|
||||
; CHECK-NEXT: ret float [[R]]
|
||||
;
|
||||
%x0 = extractelement <4 x float> %x, i32 0
|
||||
%x1 = extractelement <4 x float> %x, i32 1
|
||||
%x2 = extractelement <4 x float> %x, i32 2
|
||||
%x3 = extractelement <4 x float> %x, i32 3
|
||||
%cmp3wrong = fcmp olt float %x3, 42.0
|
||||
%cmp0 = fcmp ogt float %x0, 1.0
|
||||
%cmp1 = fcmp ogt float %x1, 1.0
|
||||
%cmp2 = fcmp ogt float %x2, 1.0
|
||||
%cmp3 = fcmp ogt float %x3, 1.0
|
||||
%or03 = or i1 %cmp0, %cmp3
|
||||
%or031 = or i1 %or03, %cmp1
|
||||
%or0312 = or i1 %or031, %cmp2
|
||||
%or03123 = or i1 %or0312, %cmp3wrong
|
||||
%r = select i1 %or03123, float -1.0, float 1.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[X3]], 42
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X0]], 1
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], 1
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X2]], 1
|
||||
; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X3]], 1
|
||||
; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3]]
|
||||
; CHECK-NEXT: [[OR033:%.*]] = or i1 [[OR03]], [[CMP3WRONG]]
|
||||
; CHECK-NEXT: [[OR0332:%.*]] = or i1 [[OR033]], [[CMP2]]
|
||||
; CHECK-NEXT: [[OR03321:%.*]] = or i1 [[OR0332]], [[CMP1]]
|
||||
; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03321]], i32 -1, i32 1
|
||||
; CHECK-NEXT: ret i32 [[R]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
%x2 = extractelement <4 x i32> %x, i32 2
|
||||
%x3 = extractelement <4 x i32> %x, i32 3
|
||||
%cmp3wrong = icmp slt i32 %x3, 42
|
||||
%cmp0 = icmp sgt i32 %x0, 1
|
||||
%cmp1 = icmp sgt i32 %x1, 1
|
||||
%cmp2 = icmp sgt i32 %x2, 1
|
||||
%cmp3 = icmp sgt i32 %x3, 1
|
||||
%or03 = or i1 %cmp0, %cmp3
|
||||
%or033 = or i1 %or03, %cmp3wrong
|
||||
%or0332 = or i1 %or033, %cmp2
|
||||
%or03321 = or i1 %or0332, %cmp1
|
||||
%r = select i1 %or03321, i32 -1, i32 1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @merge_anyof_v4i32_wrong_middle_better_rdx(<4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle_better_rdx(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; CHECK-NEXT: [[Y0:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 0
|
||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y]], i32 1
|
||||
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2
|
||||
; CHECK-NEXT: [[Y3:%.*]] = extractelement <4 x i32> [[Y]], i32 3
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], [[Y1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X3]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[Y3]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X2]], i32 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[Y0]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[Y3]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[X3]], i32 2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y2]], i32 3
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[CMP1]]
|
||||
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP11]], i32 -1, i32 1
|
||||
; CHECK-NEXT: ret i32 [[R]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
%x2 = extractelement <4 x i32> %x, i32 2
|
||||
%x3 = extractelement <4 x i32> %x, i32 3
|
||||
%y0 = extractelement <4 x i32> %y, i32 0
|
||||
%y1 = extractelement <4 x i32> %y, i32 1
|
||||
%y2 = extractelement <4 x i32> %y, i32 2
|
||||
%y3 = extractelement <4 x i32> %y, i32 3
|
||||
%cmp3wrong = icmp slt i32 %x3, %y3
|
||||
%cmp0 = icmp sgt i32 %x0, %y0
|
||||
%cmp1 = icmp sgt i32 %x1, %y1
|
||||
%cmp2 = icmp sgt i32 %x2, %y2
|
||||
%cmp3 = icmp sgt i32 %x3, %y3
|
||||
%or03 = or i1 %cmp0, %cmp3
|
||||
%or033 = or i1 %or03, %cmp3wrong
|
||||
%or0332 = or i1 %or033, %cmp2
|
||||
%or03321 = or i1 %or0332, %cmp1
|
||||
%r = select i1 %or03321, i32 -1, i32 1
|
||||
ret i32 %r
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue