forked from OSchip/llvm-project
[X86] Replace (32/64 - n) shift amounts with (neg n) since the shift amount is masked in hardware
Inspired by what AArch64 does for shifts, this patch attempts to replace shift amounts with neg if we can. This is done directly as part of isel so its as late as possible to avoid breaking some BZHI patterns since those patterns need an unmasked (32-n) to be correct. To avoid manual load folding and custom instruction selection for the negate. I've inserted new nodes in the DAG above the shift node in topological order. Differential Revision: https://reviews.llvm.org/D48789 llvm-svn: 340441
This commit is contained in:
parent
f8681cea87
commit
538f8ab438
|
@ -456,6 +456,7 @@ namespace {
|
|||
bool matchBEXTRFromAnd(SDNode *Node);
|
||||
bool shrinkAndImmediate(SDNode *N);
|
||||
bool isMaskZeroExtended(SDNode *N) const;
|
||||
bool tryShiftAmountMod(SDNode *N);
|
||||
|
||||
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
|
||||
const SDLoc &dl, MVT VT, SDNode *Node);
|
||||
|
@ -2690,6 +2691,102 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
|
|||
return CNode;
|
||||
}
|
||||
|
||||
bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// Only handle scalar shifts.
|
||||
if (VT.isVector())
|
||||
return false;
|
||||
|
||||
// Narrower shifts only mask to 5 bits in hardware.
|
||||
unsigned Size = VT == MVT::i64 ? 64 : 32;
|
||||
|
||||
SDValue OrigShiftAmt = N->getOperand(1);
|
||||
SDValue ShiftAmt = OrigShiftAmt;
|
||||
SDLoc DL(N);
|
||||
|
||||
// Skip over a truncate of the shift amount.
|
||||
if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
|
||||
ShiftAmt = ShiftAmt->getOperand(0);
|
||||
|
||||
// Special case to avoid messing up a BZHI pattern.
|
||||
// Look for (srl (shl X, (size - y)), (size - y)
|
||||
if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) &&
|
||||
N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL &&
|
||||
// Shift amounts the same?
|
||||
N->getOperand(1) == N->getOperand(0).getOperand(1) &&
|
||||
// Shift amounts size - y?
|
||||
ShiftAmt.getOpcode() == ISD::SUB &&
|
||||
isa<ConstantSDNode>(ShiftAmt.getOperand(0)) &&
|
||||
cast<ConstantSDNode>(ShiftAmt.getOperand(0))->getZExtValue() == Size)
|
||||
return false;
|
||||
|
||||
SDValue NewShiftAmt;
|
||||
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
|
||||
SDValue Add0 = ShiftAmt->getOperand(0);
|
||||
SDValue Add1 = ShiftAmt->getOperand(1);
|
||||
// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
|
||||
// to avoid the ADD/SUB.
|
||||
if (isa<ConstantSDNode>(Add1) &&
|
||||
cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
|
||||
NewShiftAmt = Add0;
|
||||
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
|
||||
// generate a NEG instead of a SUB of a constant.
|
||||
} else if (ShiftAmt->getOpcode() == ISD::SUB &&
|
||||
isa<ConstantSDNode>(Add0) &&
|
||||
cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
|
||||
cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
|
||||
// Insert a negate op.
|
||||
// TODO: This isn't guaranteed to replace the sub if there is a logic cone
|
||||
// that uses it that's not a shift.
|
||||
EVT SubVT = ShiftAmt.getValueType();
|
||||
SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
|
||||
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
|
||||
NewShiftAmt = Neg;
|
||||
|
||||
// Insert these operands into a valid topological order so they can
|
||||
// get selected independently.
|
||||
insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
|
||||
insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
|
||||
} else
|
||||
return false;
|
||||
} else
|
||||
return false;
|
||||
|
||||
if (NewShiftAmt.getValueType() != MVT::i8) {
|
||||
// Need to truncate the shift amount.
|
||||
NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
|
||||
// Add to a correct topological ordering.
|
||||
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
|
||||
}
|
||||
|
||||
// Insert a new mask to keep the shift amount legal. This should be removed
|
||||
// by isel patterns.
|
||||
NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
|
||||
CurDAG->getConstant(Size - 1, DL, MVT::i8));
|
||||
// Place in a correct topological ordering.
|
||||
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
|
||||
|
||||
SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
|
||||
NewShiftAmt);
|
||||
if (UpdatedNode != N) {
|
||||
// If we found an existing node, we should replace ourselves with that node
|
||||
// and wait for it to be selected after its other users.
|
||||
ReplaceNode(N, UpdatedNode);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If the original shift amount is now dead, delete it so that we don't run
|
||||
// it through isel.
|
||||
if (OrigShiftAmt.getNode()->use_empty())
|
||||
CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
|
||||
|
||||
// Now that we've optimized the shift amount, defer to normal isel to get
|
||||
// load folding and legacy vs BMI2 selection without repeating it here.
|
||||
SelectCode(N);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// If the high bits of an 'and' operand are known zero, try setting the
|
||||
/// high bits of an 'and' constant operand to produce a smaller encoding by
|
||||
/// creating a small, sign-extended negative immediate rather than a large
|
||||
|
@ -2820,6 +2917,13 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
return;
|
||||
}
|
||||
|
||||
case ISD::SRL:
|
||||
case ISD::SRA:
|
||||
case ISD::SHL:
|
||||
if (tryShiftAmountMod(Node))
|
||||
return;
|
||||
break;
|
||||
|
||||
case ISD::AND:
|
||||
if (matchBEXTRFromAnd(Node))
|
||||
return;
|
||||
|
|
|
@ -1108,7 +1108,7 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
|
|||
; X86-NOBMI2-LABEL: clear_lowbits32_ic0:
|
||||
; X86-NOBMI2: # %bb.0:
|
||||
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI2-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI2-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1117,7 +1117,7 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X86-BMI2-LABEL: clear_lowbits32_ic0:
|
||||
; X86-BMI2: # %bb.0:
|
||||
; X86-BMI2-NEXT: movl $32, %eax
|
||||
; X86-BMI2-NEXT: xorl %eax, %eax
|
||||
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
|
@ -1125,8 +1125,8 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits32_ic0:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI2-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: negl %ecx
|
||||
; X64-NOBMI2-NEXT: shrl %cl, %edi
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: shll %cl, %edi
|
||||
|
@ -1135,10 +1135,9 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits32_ic0:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movl $32, %eax
|
||||
; X64-BMI2-NEXT: subl %esi, %eax
|
||||
; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
|
||||
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
; X64-BMI2-NEXT: negl %esi
|
||||
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
|
||||
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%numhighbits = sub i32 32, %numlowbits
|
||||
%mask = shl i32 -1, %numhighbits
|
||||
|
@ -1150,15 +1149,16 @@ define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|||
; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
|
||||
; X86-NOBMI2: # %bb.0:
|
||||
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI2-NEXT: movb $32, %cl
|
||||
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NOBMI2-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NOBMI2-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI2-NEXT: retl
|
||||
;
|
||||
; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
|
||||
; X86-BMI2: # %bb.0:
|
||||
; X86-BMI2-NEXT: movb $32, %al
|
||||
; X86-BMI2-NEXT: xorl %eax, %eax
|
||||
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
|
||||
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
|
@ -1166,19 +1166,19 @@ define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movb $32, %cl
|
||||
; X64-NOBMI2-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI2-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: negb %cl
|
||||
; X64-NOBMI2-NEXT: shrl %cl, %edi
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: shll %cl, %edi
|
||||
; X64-NOBMI2-NEXT: movl %edi, %eax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movb $32, %al
|
||||
; X64-BMI2-NEXT: subb %sil, %al
|
||||
; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
|
||||
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
; X64-BMI2-NEXT: negb %sil
|
||||
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
|
||||
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%numhighbits = sub i8 32, %numlowbits
|
||||
%sh_prom = zext i8 %numhighbits to i32
|
||||
|
@ -1192,7 +1192,7 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
; X86-NOBMI2: # %bb.0:
|
||||
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI2-NEXT: movl (%eax), %eax
|
||||
; X86-NOBMI2-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI2-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1202,7 +1202,7 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
; X86-BMI2-LABEL: clear_lowbits32_ic2_load:
|
||||
; X86-BMI2: # %bb.0:
|
||||
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI2-NEXT: movl $32, %ecx
|
||||
; X86-BMI2-NEXT: xorl %ecx, %ecx
|
||||
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
|
||||
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
|
||||
|
@ -1210,9 +1210,9 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: movl (%rdi), %eax
|
||||
; X64-NOBMI2-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI2-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: negl %ecx
|
||||
; X64-NOBMI2-NEXT: shrl %cl, %eax
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: shll %cl, %eax
|
||||
|
@ -1220,10 +1220,9 @@ define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits32_ic2_load:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movl $32, %eax
|
||||
; X64-BMI2-NEXT: subl %esi, %eax
|
||||
; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx
|
||||
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
; X64-BMI2-NEXT: negl %esi
|
||||
; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax
|
||||
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%val = load i32, i32* %w
|
||||
%numhighbits = sub i32 32, %numlowbits
|
||||
|
@ -1237,16 +1236,17 @@ define i32 @clear_lowbits32_ic3_load_indexzext(i32* %w, i8 %numlowbits) nounwind
|
|||
; X86-NOBMI2: # %bb.0:
|
||||
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI2-NEXT: movl (%eax), %eax
|
||||
; X86-NOBMI2-NEXT: movb $32, %cl
|
||||
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NOBMI2-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NOBMI2-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI2-NEXT: retl
|
||||
;
|
||||
; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
|
||||
; X86-BMI2: # %bb.0:
|
||||
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI2-NEXT: movb $32, %cl
|
||||
; X86-BMI2-NEXT: xorl %ecx, %ecx
|
||||
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
|
||||
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
|
||||
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
|
||||
|
@ -1254,19 +1254,19 @@ define i32 @clear_lowbits32_ic3_load_indexzext(i32* %w, i8 %numlowbits) nounwind
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: movl (%rdi), %eax
|
||||
; X64-NOBMI2-NEXT: movb $32, %cl
|
||||
; X64-NOBMI2-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI2-NEXT: negb %cl
|
||||
; X64-NOBMI2-NEXT: shrl %cl, %eax
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: shll %cl, %eax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movb $32, %al
|
||||
; X64-BMI2-NEXT: subb %sil, %al
|
||||
; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx
|
||||
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
; X64-BMI2-NEXT: negb %sil
|
||||
; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax
|
||||
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%val = load i32, i32* %w
|
||||
%numhighbits = sub i8 32, %numlowbits
|
||||
|
@ -1280,7 +1280,7 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
|
|||
; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
|
||||
; X86-NOBMI2: # %bb.0:
|
||||
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI2-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI2-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1289,7 +1289,7 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
|
|||
;
|
||||
; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative:
|
||||
; X86-BMI2: # %bb.0:
|
||||
; X86-BMI2-NEXT: movl $32, %eax
|
||||
; X86-BMI2-NEXT: xorl %eax, %eax
|
||||
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
|
@ -1297,8 +1297,8 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI2-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: negl %ecx
|
||||
; X64-NOBMI2-NEXT: shrl %cl, %edi
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: shll %cl, %edi
|
||||
|
@ -1307,10 +1307,9 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind
|
|||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movl $32, %eax
|
||||
; X64-BMI2-NEXT: subl %esi, %eax
|
||||
; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
|
||||
; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
|
||||
; X64-BMI2-NEXT: negl %esi
|
||||
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
|
||||
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%numhighbits = sub i32 32, %numlowbits
|
||||
%mask = shl i32 -1, %numhighbits
|
||||
|
@ -1358,20 +1357,19 @@ define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI2-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI2-NEXT: negl %ecx
|
||||
; X64-NOBMI2-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI2-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits64_ic0:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movl $64, %eax
|
||||
; X64-BMI2-NEXT: subl %esi, %eax
|
||||
; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
|
||||
; X64-BMI2-NEXT: negl %esi
|
||||
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%numhighbits = sub i64 64, %numlowbits
|
||||
%mask = shl i64 -1, %numhighbits
|
||||
|
@ -1417,19 +1415,20 @@ define i64 @clear_lowbits64_ic1_indexzext(i64 %val, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movb $64, %cl
|
||||
; X64-NOBMI2-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI2-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: negb %cl
|
||||
; X64-NOBMI2-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI2-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movb $64, %al
|
||||
; X64-BMI2-NEXT: subb %sil, %al
|
||||
; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
|
||||
; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-BMI2-NEXT: negb %sil
|
||||
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%numhighbits = sub i8 64, %numlowbits
|
||||
%sh_prom = zext i8 %numhighbits to i64
|
||||
|
@ -1482,20 +1481,19 @@ define i64 @clear_lowbits64_ic2_load(i64* %w, i64 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI2-NEXT: movq (%rdi), %rax
|
||||
; X64-NOBMI2-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI2-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: negl %ecx
|
||||
; X64-NOBMI2-NEXT: shrq %cl, %rax
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits64_ic2_load:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movl $64, %eax
|
||||
; X64-BMI2-NEXT: subl %esi, %eax
|
||||
; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
|
||||
; X64-BMI2-NEXT: negl %esi
|
||||
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%val = load i64, i64* %w
|
||||
%numhighbits = sub i64 64, %numlowbits
|
||||
|
@ -1548,19 +1546,20 @@ define i64 @clear_lowbits64_ic3_load_indexzext(i64* %w, i8 %numlowbits) nounwind
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: movq (%rdi), %rax
|
||||
; X64-NOBMI2-NEXT: movb $64, %cl
|
||||
; X64-NOBMI2-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI2-NEXT: negb %cl
|
||||
; X64-NOBMI2-NEXT: shrq %cl, %rax
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movb $64, %al
|
||||
; X64-BMI2-NEXT: subb %sil, %al
|
||||
; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
|
||||
; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
|
||||
; X64-BMI2-NEXT: negb %sil
|
||||
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%val = load i64, i64* %w
|
||||
%numhighbits = sub i8 64, %numlowbits
|
||||
|
@ -1608,20 +1607,19 @@ define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind
|
|||
;
|
||||
; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI2-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI2-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI2-NEXT: negl %ecx
|
||||
; X64-NOBMI2-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI2-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movl $64, %eax
|
||||
; X64-BMI2-NEXT: subl %esi, %eax
|
||||
; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
|
||||
; X64-BMI2-NEXT: negl %esi
|
||||
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
%numhighbits = sub i64 64, %numlowbits
|
||||
%mask = shl i64 -1, %numhighbits
|
||||
|
|
|
@ -1016,7 +1016,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
|
|||
; X86-NOBMI-LABEL: bzhi32_c0:
|
||||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1031,8 +1031,8 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_c0:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shll %cl, %edi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %edi
|
||||
|
@ -1053,9 +1053,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|||
; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
|
||||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movb $32, %cl
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NOBMI-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI-NEXT: retl
|
||||
;
|
||||
|
@ -1067,9 +1068,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movb $32, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shll %cl, %edi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %edi
|
||||
; X64-NOBMI-NEXT: movl %edi, %eax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
|
@ -1090,7 +1092,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl (%eax), %eax
|
||||
; X86-NOBMI-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1106,9 +1108,9 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_c2_load:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movl (%rdi), %eax
|
||||
; X64-NOBMI-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shll %cl, %eax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %eax
|
||||
|
@ -1130,9 +1132,10 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
|
|||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl (%eax), %eax
|
||||
; X86-NOBMI-NEXT: movb $32, %cl
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NOBMI-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI-NEXT: retl
|
||||
;
|
||||
|
@ -1145,10 +1148,11 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movl (%rdi), %eax
|
||||
; X64-NOBMI-NEXT: movb $32, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shll %cl, %eax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %eax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
|
@ -1168,7 +1172,7 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
|
|||
; X86-NOBMI-LABEL: bzhi32_c4_commutative:
|
||||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1183,8 +1187,8 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_c4_commutative:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shll %cl, %edi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %edi
|
||||
|
@ -1241,10 +1245,10 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_c0:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
|
@ -1297,9 +1301,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movb $64, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
|
@ -1360,11 +1365,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_c2_load:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI-NEXT: movq (%rdi), %rax
|
||||
; X64-NOBMI-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
|
@ -1423,10 +1428,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movq (%rdi), %rax
|
||||
; X64-NOBMI-NEXT: movb $64, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
|
@ -1481,10 +1487,10 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_c4_commutative:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
|
@ -1507,7 +1513,7 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
|
|||
; X86-NOBMI-LABEL: bzhi32_d0:
|
||||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1522,8 +1528,8 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_d0:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shll %cl, %edi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %edi
|
||||
|
@ -1544,9 +1550,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|||
; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
|
||||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movb $32, %cl
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NOBMI-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI-NEXT: retl
|
||||
;
|
||||
|
@ -1558,9 +1565,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movb $32, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shll %cl, %edi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %edi
|
||||
; X64-NOBMI-NEXT: movl %edi, %eax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
|
@ -1581,7 +1589,7 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl (%eax), %eax
|
||||
; X86-NOBMI-NEXT: movl $32, %ecx
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
|
@ -1597,9 +1605,9 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_d2_load:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movl (%rdi), %eax
|
||||
; X64-NOBMI-NEXT: movl $32, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shll %cl, %eax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %eax
|
||||
|
@ -1621,9 +1629,10 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
|
|||
; X86-NOBMI: # %bb.0:
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl (%eax), %eax
|
||||
; X86-NOBMI-NEXT: movb $32, %cl
|
||||
; X86-NOBMI-NEXT: xorl %ecx, %ecx
|
||||
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NOBMI-NEXT: shll %cl, %eax
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NOBMI-NEXT: shrl %cl, %eax
|
||||
; X86-NOBMI-NEXT: retl
|
||||
;
|
||||
|
@ -1636,10 +1645,11 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movl (%rdi), %eax
|
||||
; X64-NOBMI-NEXT: movb $32, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shll %cl, %eax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrl %cl, %eax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
|
@ -1731,10 +1741,10 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_d0:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
|
@ -1823,9 +1833,10 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movb $64, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rdi
|
||||
; X64-NOBMI-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
|
@ -1918,11 +1929,11 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_d2_load:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI-NEXT: movq (%rdi), %rax
|
||||
; X64-NOBMI-NEXT: movl $64, %ecx
|
||||
; X64-NOBMI-NEXT: subl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: negl %ecx
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
|
@ -2013,10 +2024,11 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
|
|||
;
|
||||
; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %esi, %ecx
|
||||
; X64-NOBMI-NEXT: movq (%rdi), %rax
|
||||
; X64-NOBMI-NEXT: movb $64, %cl
|
||||
; X64-NOBMI-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI-NEXT: negb %cl
|
||||
; X64-NOBMI-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NOBMI-NEXT: shrq %cl, %rax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
|
|
|
@ -162,11 +162,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
|
|||
;
|
||||
; BTVER2-LABEL: lshift_cl:
|
||||
; BTVER2: # %bb.0: # %entry
|
||||
; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
|
||||
|
@ -174,11 +173,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
|
|||
;
|
||||
; BDVER1-LABEL: lshift_cl:
|
||||
; BDVER1: # %bb.0: # %entry
|
||||
; BDVER1-NEXT: movl %edx, %ecx
|
||||
; BDVER1-NEXT: movq %rdx, %rcx
|
||||
; BDVER1-NEXT: shlq %cl, %rdi
|
||||
; BDVER1-NEXT: movl $64, %ecx
|
||||
; BDVER1-NEXT: subl %edx, %ecx
|
||||
; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; BDVER1-NEXT: negl %ecx
|
||||
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; BDVER1-NEXT: shrq %cl, %rsi
|
||||
; BDVER1-NEXT: orq %rdi, %rsi
|
||||
; BDVER1-NEXT: movq %rsi, %rax
|
||||
|
@ -236,11 +234,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
|
|||
;
|
||||
; BTVER2-LABEL: rshift_cl:
|
||||
; BTVER2: # %bb.0: # %entry
|
||||
; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
|
||||
|
@ -248,11 +245,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
|
|||
;
|
||||
; BDVER1-LABEL: rshift_cl:
|
||||
; BDVER1: # %bb.0: # %entry
|
||||
; BDVER1-NEXT: movl %edx, %ecx
|
||||
; BDVER1-NEXT: movq %rdx, %rcx
|
||||
; BDVER1-NEXT: shrq %cl, %rdi
|
||||
; BDVER1-NEXT: movl $64, %ecx
|
||||
; BDVER1-NEXT: subl %edx, %ecx
|
||||
; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; BDVER1-NEXT: negl %ecx
|
||||
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; BDVER1-NEXT: shlq %cl, %rsi
|
||||
; BDVER1-NEXT: orq %rdi, %rsi
|
||||
; BDVER1-NEXT: movq %rsi, %rax
|
||||
|
@ -310,11 +306,10 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
|
|||
; BTVER2-LABEL: lshift_mem_cl:
|
||||
; BTVER2: # %bb.0: # %entry
|
||||
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
|
||||
; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
|
||||
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
|
||||
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
|
||||
|
@ -322,12 +317,11 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
|
|||
;
|
||||
; BDVER1-LABEL: lshift_mem_cl:
|
||||
; BDVER1: # %bb.0: # %entry
|
||||
; BDVER1-NEXT: movq %rsi, %rcx
|
||||
; BDVER1-NEXT: movq {{.*}}(%rip), %rax
|
||||
; BDVER1-NEXT: movl %esi, %ecx
|
||||
; BDVER1-NEXT: shlq %cl, %rax
|
||||
; BDVER1-NEXT: movl $64, %ecx
|
||||
; BDVER1-NEXT: subl %esi, %ecx
|
||||
; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; BDVER1-NEXT: negl %ecx
|
||||
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; BDVER1-NEXT: shrq %cl, %rdi
|
||||
; BDVER1-NEXT: orq %rax, %rdi
|
||||
; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
|
||||
|
|
Loading…
Reference in New Issue