forked from OSchip/llvm-project
[AVX-512] Fix a bad use of a high GR8 register after copying from a mask register during fast isel. This ends up extracting from bits 15:8 instead of the lower bits of the mask.
I'm pretty sure there are more problems lurking here. But I think this fixes PR32241. I've added the test case from that bug and added asserts that will fail if we ever try to copy between high registers and mask registers again. llvm-svn: 297574
This commit is contained in:
parent
e726cd0cd1
commit
58647b16e5
|
@ -1559,6 +1559,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
|
||||||
// Handle zero-extension from i1 to i8, which is common.
|
// Handle zero-extension from i1 to i8, which is common.
|
||||||
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
|
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
|
||||||
if (SrcVT == MVT::i1) {
|
if (SrcVT == MVT::i1) {
|
||||||
|
if (!Subtarget->is64Bit()) {
|
||||||
|
// If this isn't a 64-bit target we need to constrain the reg class
|
||||||
|
// to avoid high registers here otherwise we might use a high register
|
||||||
|
// to copy from a mask register.
|
||||||
|
unsigned OldReg = ResultReg;
|
||||||
|
ResultReg = createResultReg(&X86::GR8_ABCD_LRegClass);
|
||||||
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||||
|
TII.get(TargetOpcode::COPY), ResultReg)
|
||||||
|
.addReg(OldReg);
|
||||||
|
}
|
||||||
|
|
||||||
// Set the high bits to zero.
|
// Set the high bits to zero.
|
||||||
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
|
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
|
||||||
SrcVT = MVT::i8;
|
SrcVT = MVT::i8;
|
||||||
|
|
|
@ -6325,6 +6325,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
|
||||||
return X86::KMOVWrk;
|
return X86::KMOVWrk;
|
||||||
}
|
}
|
||||||
if (X86::GR8RegClass.contains(DestReg)) {
|
if (X86::GR8RegClass.contains(DestReg)) {
|
||||||
|
assert(!isHReg(DestReg) && "Cannot move between mask and h-reg");
|
||||||
DestReg = getX86SubSuperRegister(DestReg, 32);
|
DestReg = getX86SubSuperRegister(DestReg, 32);
|
||||||
return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
|
return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
|
||||||
}
|
}
|
||||||
|
@ -6348,6 +6349,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
|
||||||
return X86::KMOVWkr;
|
return X86::KMOVWkr;
|
||||||
}
|
}
|
||||||
if (X86::GR8RegClass.contains(SrcReg)) {
|
if (X86::GR8RegClass.contains(SrcReg)) {
|
||||||
|
assert(!isHReg(SrcReg) && "Cannot move between mask and h-reg");
|
||||||
SrcReg = getX86SubSuperRegister(SrcReg, 32);
|
SrcReg = getX86SubSuperRegister(SrcReg, 32);
|
||||||
return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
|
return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,9 +4,14 @@
|
||||||
define i32 @_Z3foov() {
|
define i32 @_Z3foov() {
|
||||||
; CHECK-LABEL: _Z3foov:
|
; CHECK-LABEL: _Z3foov:
|
||||||
; CHECK: # BB#0: # %entry
|
; CHECK: # BB#0: # %entry
|
||||||
; CHECK-NEXT: subl $24, %esp
|
; CHECK-NEXT: pushl %ebx
|
||||||
; CHECK-NEXT: .Lcfi0:
|
; CHECK-NEXT: .Lcfi0:
|
||||||
; CHECK-NEXT: .cfi_def_cfa_offset 28
|
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; CHECK-NEXT: subl $24, %esp
|
||||||
|
; CHECK-NEXT: .Lcfi1:
|
||||||
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
||||||
|
; CHECK-NEXT: .Lcfi2:
|
||||||
|
; CHECK-NEXT: .cfi_offset %ebx, -8
|
||||||
; CHECK-NEXT: movb $1, %al
|
; CHECK-NEXT: movb $1, %al
|
||||||
; CHECK-NEXT: movw $10959, {{[0-9]+}}(%esp) # imm = 0x2ACF
|
; CHECK-NEXT: movw $10959, {{[0-9]+}}(%esp) # imm = 0x2ACF
|
||||||
; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376
|
; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376
|
||||||
|
@ -35,9 +40,9 @@ define i32 @_Z3foov() {
|
||||||
; CHECK-NEXT: movb %ah, %cl
|
; CHECK-NEXT: movb %ah, %cl
|
||||||
; CHECK-NEXT: andl $1, %ecx
|
; CHECK-NEXT: andl $1, %ecx
|
||||||
; CHECK-NEXT: kmovw %ecx, %k0
|
; CHECK-NEXT: kmovw %ecx, %k0
|
||||||
; CHECK-NEXT: kmovb %k0, %eax
|
; CHECK-NEXT: kmovb %k0, %ebx
|
||||||
; CHECK-NEXT: andb $1, %ah
|
; CHECK-NEXT: andb $1, %bl
|
||||||
; CHECK-NEXT: movzbl %ah, %ecx
|
; CHECK-NEXT: movzbl %bl, %ecx
|
||||||
; CHECK-NEXT: xorl $-1, %ecx
|
; CHECK-NEXT: xorl $-1, %ecx
|
||||||
; CHECK-NEXT: cmpl $0, %ecx
|
; CHECK-NEXT: cmpl $0, %ecx
|
||||||
; CHECK-NEXT: kmovb %eax, %k0
|
; CHECK-NEXT: kmovb %eax, %k0
|
||||||
|
@ -58,6 +63,7 @@ define i32 @_Z3foov() {
|
||||||
; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
|
; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
|
||||||
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: addl $24, %esp
|
; CHECK-NEXT: addl $24, %esp
|
||||||
|
; CHECK-NEXT: popl %ebx
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%aa = alloca i16, align 2
|
%aa = alloca i16, align 2
|
||||||
|
|
Loading…
Reference in New Issue