forked from OSchip/llvm-project
[SelectionDAG] Don't scalarize vector fpround sources that don't need it.
Similar to the workaround code in ScalarizeVecRes_UnaryOp, ScalarizeVecRes_SETCC , ScalarizeVecRes_VSELECT, etc. If we have a case like this: ``` define <1 x half> @func(<1 x float> %x) { %tmp = fptrunc <1 x float> %x to <1 x half> ret <1 x half> %tmp } ``` On AArch64, the <1 x float> is legal. So, this will crash if we call GetScalarizedVector on it. Differential Revision: https://reviews.llvm.org/D98208
This commit is contained in:
parent
c460ef61d6
commit
f7d73a6b9e
|
@ -318,10 +318,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
|
|||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
|
||||
EVT NewVT = N->getValueType(0).getVectorElementType();
|
||||
SDValue Op = GetScalarizedVector(N->getOperand(0));
|
||||
return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
|
||||
NewVT, Op, N->getOperand(1));
|
||||
SDLoc DL(N);
|
||||
SDValue Op = N->getOperand(0);
|
||||
EVT OpVT = Op.getValueType();
|
||||
// The result needs scalarizing, but it's not a given that the source does.
|
||||
// See similar logic in ScalarizeVecRes_UnaryOp.
|
||||
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
|
||||
Op = GetScalarizedVector(Op);
|
||||
} else {
|
||||
EVT VT = OpVT.getVectorElementType();
|
||||
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
|
||||
DAG.getVectorIdxConstant(0, DL));
|
||||
}
|
||||
return DAG.getNode(ISD::FP_ROUND, DL,
|
||||
N->getValueType(0).getVectorElementType(), Op,
|
||||
N->getOperand(1));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
|
||||
|
|
|
@ -199,6 +199,33 @@ define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
|
|||
ret <2 x float> %vcvt1.i
|
||||
}
|
||||
|
||||
define half @test_vcvt_f16_f32(<1 x float> %x) {
|
||||
; GENERIC-LABEL: test_vcvt_f16_f32:
|
||||
; GENERIC: // %bb.0:
|
||||
; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; GENERIC-NEXT: fcvt h0, s0
|
||||
; GENERIC-NEXT: ret
|
||||
;
|
||||
; FAST-LABEL: test_vcvt_f16_f32:
|
||||
; FAST: // %bb.0:
|
||||
; FAST-NEXT: mov.16b v1, v0
|
||||
; FAST-NEXT: // implicit-def: $q0
|
||||
; FAST-NEXT: mov.16b v0, v1
|
||||
; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0
|
||||
; FAST-NEXT: fcvt h0, s0
|
||||
; FAST-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: test_vcvt_f16_f32:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: fmov x8, d0
|
||||
; GISEL-NEXT: fmov s0, w8
|
||||
; GISEL-NEXT: fcvt h0, s0
|
||||
; GISEL-NEXT: ret
|
||||
%tmp = fptrunc <1 x float> %x to <1 x half>
|
||||
%elt = extractelement <1 x half> %tmp, i32 0
|
||||
ret half %elt
|
||||
}
|
||||
|
||||
; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
|
||||
; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
|
||||
define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
|
||||
|
|
Loading…
Reference in New Issue