[X86] Replace (32/64 - n) shift amounts with (neg n) since the shift amount is masked in hardware

Inspired by what AArch64 does for shifts, this patch attempts to replace shift amounts with neg if we can.

This is done directly as part of isel so its as late as possible to avoid breaking some BZHI patterns since those patterns need an unmasked (32-n) to be correct.

To avoid manual load folding and custom instruction selection for the negate. I've inserted new nodes in the DAG above the shift node in topological order.

Differential Revision: https://reviews.llvm.org/D48789

llvm-svn: 340441
This commit is contained in:
Craig Topper 2018-08-22 19:39:09 +00:00
parent f8681cea87
commit 538f8ab438
4 changed files with 255 additions and 147 deletions

View File

@ -456,6 +456,7 @@ namespace {
bool matchBEXTRFromAnd(SDNode *Node);
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
bool tryShiftAmountMod(SDNode *N);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
@ -2690,6 +2691,102 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
return CNode;
}
bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
EVT VT = N->getValueType(0);
// Only handle scalar shifts.
if (VT.isVector())
return false;
// Narrower shifts only mask to 5 bits in hardware.
unsigned Size = VT == MVT::i64 ? 64 : 32;
SDValue OrigShiftAmt = N->getOperand(1);
SDValue ShiftAmt = OrigShiftAmt;
SDLoc DL(N);
// Skip over a truncate of the shift amount.
if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
ShiftAmt = ShiftAmt->getOperand(0);
// Special case to avoid messing up a BZHI pattern.
// Look for (srl (shl X, (size - y)), (size - y)
if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) &&
N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL &&
// Shift amounts the same?
N->getOperand(1) == N->getOperand(0).getOperand(1) &&
// Shift amounts size - y?
ShiftAmt.getOpcode() == ISD::SUB &&
isa<ConstantSDNode>(ShiftAmt.getOperand(0)) &&
cast<ConstantSDNode>(ShiftAmt.getOperand(0))->getZExtValue() == Size)
return false;
SDValue NewShiftAmt;
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
SDValue Add0 = ShiftAmt->getOperand(0);
SDValue Add1 = ShiftAmt->getOperand(1);
// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
// to avoid the ADD/SUB.
if (isa<ConstantSDNode>(Add1) &&
cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
NewShiftAmt = Add0;
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
// generate a NEG instead of a SUB of a constant.
} else if (ShiftAmt->getOpcode() == ISD::SUB &&
isa<ConstantSDNode>(Add0) &&
cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
// Insert a negate op.
// TODO: This isn't guaranteed to replace the sub if there is a logic cone
// that uses it that's not a shift.
EVT SubVT = ShiftAmt.getValueType();
SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
NewShiftAmt = Neg;
// Insert these operands into a valid topological order so they can
// get selected independently.
insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
} else
return false;
} else
return false;
if (NewShiftAmt.getValueType() != MVT::i8) {
// Need to truncate the shift amount.
NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
// Add to a correct topological ordering.
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
}
// Insert a new mask to keep the shift amount legal. This should be removed
// by isel patterns.
NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
CurDAG->getConstant(Size - 1, DL, MVT::i8));
// Place in a correct topological ordering.
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
NewShiftAmt);
if (UpdatedNode != N) {
// If we found an existing node, we should replace ourselves with that node
// and wait for it to be selected after its other users.
ReplaceNode(N, UpdatedNode);
return true;
}
// If the original shift amount is now dead, delete it so that we don't run
// it through isel.
if (OrigShiftAmt.getNode()->use_empty())
CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
// Now that we've optimized the shift amount, defer to normal isel to get
// load folding and legacy vs BMI2 selection without repeating it here.
SelectCode(N);
return true;
}
/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
@ -2820,6 +2917,13 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::SRL:
case ISD::SRA:
case ISD::SHL:
if (tryShiftAmountMod(Node))
return;
break;
case ISD::AND:
if (matchBEXTRFromAnd(Node))
return;

View File

@ -1108,7 +1108,7 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits32_ic0:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl $32, %ecx
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1117,7 +1117,7 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
;
; X86-BMI2-LABEL: clear_lowbits32_ic0:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl $32, %eax
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
@ -1125,8 +1125,8 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic0:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl $32, %ecx
; X64-NOBMI2-NEXT: subl %esi, %ecx
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrl %cl, %edi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %edi
@ -1135,10 +1135,9 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
;
; X64-BMI2-LABEL: clear_lowbits32_ic0:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $32, %eax
; X64-BMI2-NEXT: subl %esi, %eax
; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X64-BMI2-NEXT: negl %esi
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%numhighbits = sub i32 32, %numlowbits
%mask = shl i32 -1, %numhighbits
@ -1150,15 +1149,16 @@ define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movb $32, %cl
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movb $32, %al
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
@ -1166,19 +1166,19 @@ define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind {
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movb $32, %cl
; X64-NOBMI2-NEXT: subb %sil, %cl
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrl %cl, %edi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %edi
; X64-NOBMI2-NEXT: movl %edi, %eax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movb $32, %al
; X64-BMI2-NEXT: subb %sil, %al
; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
@ -1192,7 +1192,7 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
; X86-NOBMI2-NEXT: movl $32, %ecx
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1202,7 +1202,7 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
; X86-BMI2-LABEL: clear_lowbits32_ic2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $32, %ecx
; X86-BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
@ -1210,9 +1210,9 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movl (%rdi), %eax
; X64-NOBMI2-NEXT: movl $32, %ecx
; X64-NOBMI2-NEXT: subl %esi, %ecx
; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrl %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
@ -1220,10 +1220,9 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
;
; X64-BMI2-LABEL: clear_lowbits32_ic2_load:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $32, %eax
; X64-BMI2-NEXT: subl %esi, %eax
; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X64-BMI2-NEXT: negl %esi
; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i32 32, %numlowbits
@ -1237,16 +1236,17 @@ define i32 @clear_lowbits32_ic3_load_indexzext(i32* %w, i8 %numlowbits) nounwind
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
; X86-NOBMI2-NEXT: movb $32, %cl
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movb $32, %cl
; X86-BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
@ -1254,19 +1254,19 @@ define i32 @clear_lowbits32_ic3_load_indexzext(i32* %w, i8 %numlowbits) nounwind
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movl (%rdi), %eax
; X64-NOBMI2-NEXT: movb $32, %cl
; X64-NOBMI2-NEXT: subb %sil, %cl
; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movb $32, %al
; X64-BMI2-NEXT: subb %sil, %al
; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i8 32, %numlowbits
@ -1280,7 +1280,7 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl $32, %ecx
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1289,7 +1289,7 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
;
; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl $32, %eax
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
@ -1297,8 +1297,8 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl $32, %ecx
; X64-NOBMI2-NEXT: subl %esi, %ecx
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrl %cl, %edi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %edi
@ -1307,10 +1307,9 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
;
; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $32, %eax
; X64-BMI2-NEXT: subl %esi, %eax
; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X64-BMI2-NEXT: negl %esi
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%numhighbits = sub i32 32, %numlowbits
%mask = shl i32 -1, %numhighbits
@ -1358,20 +1357,19 @@ define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind {
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl $64, %ecx
; X64-NOBMI2-NEXT: subl %esi, %ecx
; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrq %cl, %rdi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rdi
; X64-NOBMI2-NEXT: movq %rdi, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic0:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $64, %eax
; X64-BMI2-NEXT: subl %esi, %eax
; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
; X64-BMI2-NEXT: negl %esi
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%numhighbits = sub i64 64, %numlowbits
%mask = shl i64 -1, %numhighbits
@ -1417,19 +1415,20 @@ define i64 @clear_lowbits64_ic1_indexzext(i64 %val, i8 %numlowbits) nounwind {
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movb $64, %cl
; X64-NOBMI2-NEXT: subb %sil, %cl
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrq %cl, %rdi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shlq %cl, %rdi
; X64-NOBMI2-NEXT: movq %rdi, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movb $64, %al
; X64-BMI2-NEXT: subb %sil, %al
; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
@ -1482,20 +1481,19 @@ define i64 @clear_lowbits64_ic2_load(i64* %w, i64 %numlowbits) nounwind {
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: movq (%rdi), %rax
; X64-NOBMI2-NEXT: movl $64, %ecx
; X64-NOBMI2-NEXT: subl %esi, %ecx
; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrq %cl, %rax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic2_load:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $64, %eax
; X64-BMI2-NEXT: subl %esi, %eax
; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
; X64-BMI2-NEXT: negl %esi
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i64 64, %numlowbits
@ -1548,19 +1546,20 @@ define i64 @clear_lowbits64_ic3_load_indexzext(i64* %w, i8 %numlowbits) nounwind
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movq (%rdi), %rax
; X64-NOBMI2-NEXT: movb $64, %cl
; X64-NOBMI2-NEXT: subb %sil, %cl
; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrq %cl, %rax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shlq %cl, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movb $64, %al
; X64-BMI2-NEXT: subb %sil, %al
; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i8 64, %numlowbits
@ -1608,20 +1607,19 @@ define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl $64, %ecx
; X64-NOBMI2-NEXT: subl %esi, %ecx
; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrq %cl, %rdi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rdi
; X64-NOBMI2-NEXT: movq %rdi, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movl $64, %eax
; X64-BMI2-NEXT: subl %esi, %eax
; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
; X64-BMI2-NEXT: negl %esi
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%numhighbits = sub i64 64, %numlowbits
%mask = shl i64 -1, %numhighbits

View File

@ -1016,7 +1016,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_c0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl $32, %ecx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1031,8 +1031,8 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_c0:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl $32, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
@ -1053,9 +1053,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movb $32, %cl
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@ -1067,9 +1068,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movb $32, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
; X64-NOBMI-NEXT: movl %edi, %eax
; X64-NOBMI-NEXT: retq
@ -1090,7 +1092,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: movl $32, %ecx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1106,9 +1108,9 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_c2_load:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
; X64-NOBMI-NEXT: movl $32, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
@ -1130,9 +1132,10 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: movb $32, %cl
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@ -1145,10 +1148,11 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
; X64-NOBMI-NEXT: movb $32, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
; X64-NOBMI-NEXT: retq
;
@ -1168,7 +1172,7 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_c4_commutative:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl $32, %ecx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1183,8 +1187,8 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_c4_commutative:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl $32, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
@ -1241,10 +1245,10 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_c0:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl $64, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rdi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@ -1297,9 +1301,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movb $64, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rdi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@ -1360,11 +1365,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_c2_load:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq (%rdi), %rax
; X64-NOBMI-NEXT: movl $64, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;
@ -1423,10 +1428,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movq (%rdi), %rax
; X64-NOBMI-NEXT: movb $64, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;
@ -1481,10 +1487,10 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_c4_commutative:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl $64, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rdi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@ -1507,7 +1513,7 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_d0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl $32, %ecx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1522,8 +1528,8 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_d0:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl $32, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
@ -1544,9 +1550,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movb $32, %cl
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@ -1558,9 +1565,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movb $32, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
; X64-NOBMI-NEXT: movl %edi, %eax
; X64-NOBMI-NEXT: retq
@ -1581,7 +1589,7 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: movl $32, %ecx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@ -1597,9 +1605,9 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_d2_load:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
; X64-NOBMI-NEXT: movl $32, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
@ -1621,9 +1629,10 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: movb $32, %cl
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@ -1636,10 +1645,11 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
; X64-NOBMI-NEXT: movb $32, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
; X64-NOBMI-NEXT: retq
;
@ -1731,10 +1741,10 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_d0:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl $64, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rdi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@ -1823,9 +1833,10 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movb $64, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rdi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@ -1918,11 +1929,11 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_d2_load:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq (%rdi), %rax
; X64-NOBMI-NEXT: movl $64, %ecx
; X64-NOBMI-NEXT: subl %esi, %ecx
; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;
@ -2013,10 +2024,11 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
;
; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movq (%rdi), %rax
; X64-NOBMI-NEXT: movb $64, %cl
; X64-NOBMI-NEXT: subb %sil, %cl
; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;

View File

@ -162,11 +162,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
;
; BTVER2-LABEL: lshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50]
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50]
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
@ -174,11 +173,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
;
; BDVER1-LABEL: lshift_cl:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movl %edx, %ecx
; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: shlq %cl, %rdi
; BDVER1-NEXT: movl $64, %ecx
; BDVER1-NEXT: subl %edx, %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
; BDVER1-NEXT: negl %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shrq %cl, %rsi
; BDVER1-NEXT: orq %rdi, %rsi
; BDVER1-NEXT: movq %rsi, %rax
@ -236,11 +234,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
;
; BTVER2-LABEL: rshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50]
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50]
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
@ -248,11 +245,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
;
; BDVER1-LABEL: rshift_cl:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movl %edx, %ecx
; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: shrq %cl, %rdi
; BDVER1-NEXT: movl $64, %ecx
; BDVER1-NEXT: subl %edx, %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
; BDVER1-NEXT: negl %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shlq %cl, %rsi
; BDVER1-NEXT: orq %rdi, %rsi
; BDVER1-NEXT: movq %rsi, %rax
@ -310,11 +306,10 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
; BTVER2-LABEL: lshift_mem_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
@ -322,12 +317,11 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
;
; BDVER1-LABEL: lshift_mem_cl:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rsi, %rcx
; BDVER1-NEXT: movq {{.*}}(%rip), %rax
; BDVER1-NEXT: movl %esi, %ecx
; BDVER1-NEXT: shlq %cl, %rax
; BDVER1-NEXT: movl $64, %ecx
; BDVER1-NEXT: subl %esi, %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
; BDVER1-NEXT: negl %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shrq %cl, %rdi
; BDVER1-NEXT: orq %rax, %rdi
; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)