From 3f37a4180b85b78ee5cd82ff4d67f63796c54362 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 30 Sep 2016 05:35:42 +0000 Subject: [PATCH] Revert r282835 "[AVX-512] Always use the full 32 register vector classes for addRegisterClass regardless of whether AVX512/VLX is enabled or not." Turns out this doesn't pass verify-machineinstrs. llvm-svn: 282841 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 45 ++++--- llvm/test/CodeGen/X86/sad.ll | 16 +-- .../CodeGen/X86/vector-half-conversions.ll | 118 +++++++++--------- 3 files changed, 98 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b6ab9c3ec20d..a1600230cc9b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -485,8 +485,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. - addRegisterClass(MVT::f32, &X86::FR32XRegClass); - addRegisterClass(MVT::f64, &X86::FR64XRegClass); + addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass + : &X86::FR32RegClass); + addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass + : &X86::FR64RegClass); for (auto VT : { MVT::f32, MVT::f64 }) { // Use ANDPD to simulate FABS. @@ -515,7 +517,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } else if (UseX87 && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. - addRegisterClass(MVT::f32, &X86::FR32XRegClass); + addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass + : &X86::FR32RegClass); addRegisterClass(MVT::f64, &X86::RFP64RegClass); // Use ANDPS to simulate FABS. @@ -718,7 +721,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { - addRegisterClass(MVT::v4f32, &X86::VR128XRegClass); + addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::FABS, MVT::v4f32, Custom); @@ -731,14 +735,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { - addRegisterClass(MVT::v2f64, &X86::VR128XRegClass); + addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM // registers cannot be used even for integer operations. - addRegisterClass(MVT::v16i8, &X86::VR128XRegClass); - addRegisterClass(MVT::v8i16, &X86::VR128XRegClass); - addRegisterClass(MVT::v4i32, &X86::VR128XRegClass); - addRegisterClass(MVT::v2i64, &X86::VR128XRegClass); + addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); + addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); + addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); + addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); @@ -946,12 +955,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) { bool HasInt256 = Subtarget.hasInt256(); - addRegisterClass(MVT::v32i8, &X86::VR256XRegClass); - addRegisterClass(MVT::v16i16, &X86::VR256XRegClass); - addRegisterClass(MVT::v8i32, &X86::VR256XRegClass); - addRegisterClass(MVT::v8f32, &X86::VR256XRegClass); - addRegisterClass(MVT::v4i64, &X86::VR256XRegClass); - addRegisterClass(MVT::v4f64, &X86::VR256XRegClass); + addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); + addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); for (auto VT : { MVT::v8f32, MVT::v4f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index 4c57d3bec8ab..07c07485c88e 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -155,12 +155,12 @@ define i32 @sad_32i8() nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm13, %xmm13 ; SSE2-NEXT: pxor %xmm15, %xmm15 +; SSE2-NEXT: pxor %xmm5, %xmm5 ; SSE2-NEXT: pxor %xmm14, %xmm14 -; SSE2-NEXT: pxor %xmm6, %xmm6 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB1_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill @@ -252,9 +252,11 @@ define i32 @sad_32i8() nounwind { ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; SSE2-NEXT: paddd %xmm3, %xmm4 ; SSE2-NEXT: paddd %xmm6, %xmm0 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload -; SSE2-NEXT: paddd %xmm7, %xmm6 -; SSE2-NEXT: paddd %xmm5, %xmm14 +; SSE2-NEXT: paddd %xmm7, %xmm14 +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload +; SSE2-NEXT: paddd %xmm5, %xmm3 +; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload ; SSE2-NEXT: paddd %xmm8, %xmm1 ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload ; SSE2-NEXT: paddd %xmm2, %xmm3 @@ -264,9 +266,9 @@ define i32 @sad_32i8() nounwind { ; SSE2-NEXT: jne .LBB1_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm15, %xmm4 -; SSE2-NEXT: paddd %xmm6, %xmm1 +; SSE2-NEXT: paddd %xmm14, %xmm1 ; SSE2-NEXT: paddd %xmm13, %xmm0 -; SSE2-NEXT: paddd %xmm14, %xmm2 +; SSE2-NEXT: paddd %xmm5, %xmm2 ; SSE2-NEXT: paddd %xmm4, %xmm1 ; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: paddd %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index d64b37c2ffce..57705ad71011 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -3350,69 +3350,69 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; ; AVX512F-LABEL: cvt_16f32_to_16i16: ; AVX512F: # BB#0: -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm2 +; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 +; AVX512F-NEXT: vmovd %eax, %xmm3 +; AVX512F-NEXT: vmovd %xmm2, %eax ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm14 -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[3,1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm4, %ymm4 -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm5, %ymm5 -; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm7 -; AVX512F-NEXT: vextractf128 $1, %ymm7, %xmm8 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm6 = xmm8[3,1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm6, %ymm6 -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm9 = xmm8[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm9, %ymm9 -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm10 = xmm8[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm10, %ymm10 -; AVX512F-NEXT: vcvtps2ph $4, %zmm8, %ymm8 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm11 = xmm7[3,1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm11, %ymm11 -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm12 = xmm7[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm12, %ymm12 -; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm13 -; AVX512F-NEXT: vmovd %xmm13, %eax -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm7 = xmm7[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm7 -; AVX512F-NEXT: vmovd %eax, %xmm2 -; AVX512F-NEXT: vmovd %xmm7, %eax -; AVX512F-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vmovd %xmm12, %eax -; AVX512F-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vmovd %xmm11, %eax -; AVX512F-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vmovd %xmm8, %eax -; AVX512F-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vmovd %xmm10, %eax -; AVX512F-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vmovd %xmm9, %eax -; AVX512F-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vmovd %xmm6, %eax -; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm6 -; AVX512F-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; AVX512F-NEXT: vmovd %xmm6, %eax -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 -; AVX512F-NEXT: vmovd %eax, %xmm6 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: vpinsrw $1, %eax, %xmm6, %xmm0 -; AVX512F-NEXT: vmovd %xmm5, %eax -; AVX512F-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm4, %eax -; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm4 -; AVX512F-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm4, %eax -; AVX512F-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm3, %eax -; AVX512F-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm14, %eax +; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 +; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 -; AVX512F-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm1 +; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 +; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] +; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 +; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 +; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 +; AVX512F-NEXT: vmovd %eax, %xmm3 +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 +; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm0 +; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] +; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: retq ;