[AVX-512] Fix a bad use of a high GR8 register after copying from a mask register during fast isel. This ends up extracting from bits 15:8 instead of the lower bits of the mask.

I'm pretty sure there are more problems lurking here. But I think this fixes PR32241.

I've added the test case from that bug and added asserts that will fail if we ever try to copy between high registers and mask registers again.

llvm-svn: 297574
This commit is contained in:
Craig Topper 2017-03-12 03:37:37 +00:00
parent e726cd0cd1
commit 58647b16e5
3 changed files with 24 additions and 5 deletions

View File

@ -1559,6 +1559,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common. // Handle zero-extension from i1 to i8, which is common.
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT == MVT::i1) { if (SrcVT == MVT::i1) {
if (!Subtarget->is64Bit()) {
// If this isn't a 64-bit target we need to constrain the reg class
// to avoid high registers here otherwise we might use a high register
// to copy from a mask register.
unsigned OldReg = ResultReg;
ResultReg = createResultReg(&X86::GR8_ABCD_LRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(OldReg);
}
// Set the high bits to zero. // Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8; SrcVT = MVT::i8;

View File

@ -6325,6 +6325,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
return X86::KMOVWrk; return X86::KMOVWrk;
} }
if (X86::GR8RegClass.contains(DestReg)) { if (X86::GR8RegClass.contains(DestReg)) {
assert(!isHReg(DestReg) && "Cannot move between mask and h-reg");
DestReg = getX86SubSuperRegister(DestReg, 32); DestReg = getX86SubSuperRegister(DestReg, 32);
return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk; return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
} }
@ -6348,6 +6349,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
return X86::KMOVWkr; return X86::KMOVWkr;
} }
if (X86::GR8RegClass.contains(SrcReg)) { if (X86::GR8RegClass.contains(SrcReg)) {
assert(!isHReg(SrcReg) && "Cannot move between mask and h-reg");
SrcReg = getX86SubSuperRegister(SrcReg, 32); SrcReg = getX86SubSuperRegister(SrcReg, 32);
return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr; return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
} }

View File

@ -4,9 +4,14 @@
define i32 @_Z3foov() { define i32 @_Z3foov() {
; CHECK-LABEL: _Z3foov: ; CHECK-LABEL: _Z3foov:
; CHECK: # BB#0: # %entry ; CHECK: # BB#0: # %entry
; CHECK-NEXT: subl $24, %esp ; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: .Lcfi0: ; CHECK-NEXT: .Lcfi0:
; CHECK-NEXT: .cfi_def_cfa_offset 28 ; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: subl $24, %esp
; CHECK-NEXT: .Lcfi1:
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .Lcfi2:
; CHECK-NEXT: .cfi_offset %ebx, -8
; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: movw $10959, {{[0-9]+}}(%esp) # imm = 0x2ACF ; CHECK-NEXT: movw $10959, {{[0-9]+}}(%esp) # imm = 0x2ACF
; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376 ; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376
@ -35,9 +40,9 @@ define i32 @_Z3foov() {
; CHECK-NEXT: movb %ah, %cl ; CHECK-NEXT: movb %ah, %cl
; CHECK-NEXT: andl $1, %ecx ; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: kmovw %ecx, %k0 ; CHECK-NEXT: kmovw %ecx, %k0
; CHECK-NEXT: kmovb %k0, %eax ; CHECK-NEXT: kmovb %k0, %ebx
; CHECK-NEXT: andb $1, %ah ; CHECK-NEXT: andb $1, %bl
; CHECK-NEXT: movzbl %ah, %ecx ; CHECK-NEXT: movzbl %bl, %ecx
; CHECK-NEXT: xorl $-1, %ecx ; CHECK-NEXT: xorl $-1, %ecx
; CHECK-NEXT: cmpl $0, %ecx ; CHECK-NEXT: cmpl $0, %ecx
; CHECK-NEXT: kmovb %eax, %k0 ; CHECK-NEXT: kmovb %eax, %k0
@ -58,6 +63,7 @@ define i32 @_Z3foov() {
; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) ; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl $24, %esp ; CHECK-NEXT: addl $24, %esp
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl ; CHECK-NEXT: retl
entry: entry:
%aa = alloca i16, align 2 %aa = alloca i16, align 2