[VE] Implement FoldImmediate

Implement FoldImmediate for only integer aritihmetic operations.
Add regression tests also.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D91150
This commit is contained in:
Kazushi (Jam) Marukawa 2020-11-03 22:08:57 +09:00
parent c8d73d939f
commit dd6f607ea8
16 changed files with 628 additions and 130 deletions

View File

@ -349,6 +349,24 @@ inline static bool isMImm32Val(uint32_t Val) {
return (Val & (1 << 31)) && isShiftedMask_32(Val);
}
/// val2MImm - Convert an integer immediate value to target MImm immediate.
inline static uint64_t val2MImm(uint64_t Val) {
if (Val == 0)
return 0; // (0)1
if (Val & (1UL << 63))
return countLeadingOnes(Val); // (m)1
return countLeadingZeros(Val) | 0x40; // (m)0
}
/// mimm2Val - Convert a target MImm immediate to an integer immediate value.
inline static uint64_t mimm2Val(uint64_t Val) {
if (Val == 0)
return 0; // (0)1
if ((Val & 0x40) == 0)
return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1
return ((uint64_t)(-1L) >> (Val & 0x3f)); // (m)0
}
inline unsigned M0(unsigned Val) { return Val + 64; }
inline unsigned M1(unsigned Val) { return Val; }

View File

@ -113,15 +113,6 @@ inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
return Val;
}
/// convMImmVal - Convert a mimm integer immediate value to target immediate.
inline static uint64_t convMImmVal(uint64_t Val) {
if (Val == 0)
return 0; // (0)1
if (Val & (1UL << 63))
return countLeadingOnes(Val); // (m)1
return countLeadingZeros(Val) | 0x40; // (m)0
}
//===--------------------------------------------------------------------===//
/// VEDAGToDAGISel - VE specific code to select VE machine
/// instructions for SelectionDAG operations.

View File

@ -501,6 +501,184 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
report_fatal_error("Can't load this register from stack slot");
}
bool VEInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Register Reg, MachineRegisterInfo *MRI) const {
LLVM_DEBUG(dbgs() << "FoldImmediate\n");
LLVM_DEBUG(dbgs() << "checking DefMI\n");
int64_t ImmVal;
switch (DefMI.getOpcode()) {
default:
return false;
case VE::ORim:
// General move small immediate instruction on VE.
LLVM_DEBUG(dbgs() << "checking ORim\n");
LLVM_DEBUG(DefMI.dump());
// FIXME: We may need to support FPImm too.
assert(DefMI.getOperand(1).isImm());
assert(DefMI.getOperand(2).isImm());
ImmVal =
DefMI.getOperand(1).getImm() + mimm2Val(DefMI.getOperand(2).getImm());
LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
break;
case VE::LEAzii:
// General move immediate instruction on VE.
LLVM_DEBUG(dbgs() << "checking LEAzii\n");
LLVM_DEBUG(DefMI.dump());
// FIXME: We may need to support FPImm too.
assert(DefMI.getOperand(2).isImm());
if (!DefMI.getOperand(3).isImm())
// LEAzii may refer label
return false;
ImmVal = DefMI.getOperand(2).getImm() + DefMI.getOperand(3).getImm();
LLVM_DEBUG(dbgs() << "ImmVal is " << ImmVal << "\n");
break;
}
// Try to fold like below:
// %1:i64 = ORim 0, 0(1)
// %2:i64 = CMPSLrr %0, %1
// To
// %2:i64 = CMPSLrm %0, 0(1)
//
// Another example:
// %1:i64 = ORim 6, 0(1)
// %2:i64 = CMPSLrr %1, %0
// To
// %2:i64 = CMPSLir 6, %0
//
// Support commutable instructions like below:
// %1:i64 = ORim 6, 0(1)
// %2:i64 = ADDSLrr %1, %0
// To
// %2:i64 = ADDSLri %0, 6
//
// FIXME: Need to support i32. Current implementtation requires
// EXTRACT_SUBREG, so input has following COPY and it avoids folding:
// %1:i64 = ORim 6, 0(1)
// %2:i32 = COPY %1.sub_i32
// %3:i32 = ADDSWSXrr %0, %2
// FIXME: Need to support shift, cmov, and more instructions.
// FIXME: Need to support lvl too, but LVLGen runs after peephole-opt.
LLVM_DEBUG(dbgs() << "checking UseMI\n");
LLVM_DEBUG(UseMI.dump());
unsigned NewUseOpcSImm7;
unsigned NewUseOpcMImm;
enum InstType {
rr2ri_rm, // rr -> ri or rm, commutable
rr2ir_rm, // rr -> ir or rm
} InstType;
using namespace llvm::VE;
#define INSTRKIND(NAME) \
case NAME##rr: \
NewUseOpcSImm7 = NAME##ri; \
NewUseOpcMImm = NAME##rm; \
InstType = rr2ri_rm; \
break
#define NCINSTRKIND(NAME) \
case NAME##rr: \
NewUseOpcSImm7 = NAME##ir; \
NewUseOpcMImm = NAME##rm; \
InstType = rr2ir_rm; \
break
switch (UseMI.getOpcode()) {
default:
return false;
INSTRKIND(ADDUL);
INSTRKIND(ADDSWSX);
INSTRKIND(ADDSWZX);
INSTRKIND(ADDSL);
NCINSTRKIND(SUBUL);
NCINSTRKIND(SUBSWSX);
NCINSTRKIND(SUBSWZX);
NCINSTRKIND(SUBSL);
INSTRKIND(MULUL);
INSTRKIND(MULSWSX);
INSTRKIND(MULSWZX);
INSTRKIND(MULSL);
NCINSTRKIND(DIVUL);
NCINSTRKIND(DIVSWSX);
NCINSTRKIND(DIVSWZX);
NCINSTRKIND(DIVSL);
NCINSTRKIND(CMPUL);
NCINSTRKIND(CMPSWSX);
NCINSTRKIND(CMPSWZX);
NCINSTRKIND(CMPSL);
INSTRKIND(MAXSWSX);
INSTRKIND(MAXSWZX);
INSTRKIND(MAXSL);
INSTRKIND(MINSWSX);
INSTRKIND(MINSWZX);
INSTRKIND(MINSL);
INSTRKIND(AND);
INSTRKIND(OR);
INSTRKIND(XOR);
INSTRKIND(EQV);
NCINSTRKIND(NND);
NCINSTRKIND(MRG);
}
#undef INSTRKIND
unsigned NewUseOpc;
unsigned UseIdx;
bool Commute = false;
LLVM_DEBUG(dbgs() << "checking UseMI operands\n");
switch (InstType) {
case rr2ri_rm:
UseIdx = 2;
if (UseMI.getOperand(1).getReg() == Reg) {
Commute = true;
} else {
assert(UseMI.getOperand(2).getReg() == Reg);
}
if (isInt<7>(ImmVal)) {
// This ImmVal matches to SImm7 slot, so change UseOpc to an instruction
// holds a simm7 slot.
NewUseOpc = NewUseOpcSImm7;
} else if (isMImmVal(ImmVal)) {
// Similarly, change UseOpc to an instruction holds a mimm slot.
NewUseOpc = NewUseOpcMImm;
ImmVal = val2MImm(ImmVal);
} else
return false;
break;
case rr2ir_rm:
if (UseMI.getOperand(1).getReg() == Reg) {
// Check immediate value whether it matchs to the UseMI instruction.
if (!isInt<7>(ImmVal))
return false;
NewUseOpc = NewUseOpcSImm7;
UseIdx = 1;
} else {
assert(UseMI.getOperand(2).getReg() == Reg);
// Check immediate value whether it matchs to the UseMI instruction.
if (!isMImmVal(ImmVal))
return false;
NewUseOpc = NewUseOpcMImm;
ImmVal = val2MImm(ImmVal);
UseIdx = 2;
}
break;
}
LLVM_DEBUG(dbgs() << "modifying UseMI\n");
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.setDesc(get(NewUseOpc));
if (Commute) {
UseMI.getOperand(1).setReg(UseMI.getOperand(UseIdx).getReg());
}
UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
if (DeleteDef)
DefMI.eraseFromParent();
return true;
}
Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
VEMachineFunctionInfo *VEFI = MF->getInfo<VEMachineFunctionInfo>();
Register GlobalBaseReg = VEFI->getGlobalBaseReg();

View File

@ -100,6 +100,13 @@ public:
const TargetRegisterInfo *TRI) const override;
/// } Stack Spill & Reload
/// Optimization {
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
/// } Optimization
Register getGlobalBaseReg(MachineFunction *MF) const;
// Lower pseudo instructions after register allocation.

View File

@ -48,7 +48,7 @@ def LO7 : SDNodeXForm<imm, [{
SDLoc(N), MVT::i32);
}]>;
def MIMM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(convMImmVal(getImmVal(N)),
return CurDAG->getTargetConstant(val2MImm(getImmVal(N)),
SDLoc(N), MVT::i32);
}]>;
def LO32 : SDNodeXForm<imm, [{
@ -66,7 +66,7 @@ def LO7FP : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(SignExtend32(Val, 7), SDLoc(N), MVT::i32);
}]>;
def MIMMFP : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(convMImmVal(getFpImmVal(N)),
return CurDAG->getTargetConstant(val2MImm(getFpImmVal(N)),
SDLoc(N), MVT::i32);
}]>;
def LOFP32 : SDNodeXForm<fpimm, [{
@ -515,7 +515,8 @@ multiclass RRbm<string opcStr, bits<8>opc,
RegisterClass RCo, ValueType Tyo,
RegisterClass RCi, ValueType Tyi,
SDPatternOperator OpNode = null_frag,
Operand immOp = simm7, Operand mOp = mimm> {
Operand immOp = simm7, Operand mOp = mimm,
bit MoveImm = 0> {
def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
!strconcat(opcStr, " $sx, $sy, $sz"),
[(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
@ -532,7 +533,12 @@ multiclass RRbm<string opcStr, bits<8>opc,
let cy = 0, cz = 0 in
def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
!strconcat(opcStr, " $sx, $sy, $sz"),
[(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
[(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]> {
// VE uses ORim as a move immediate instruction, so declare it here.
// An instruction declared as MoveImm will be optimized in FoldImmediate
// later.
let isMoveImm = MoveImm;
}
}
// Multiclass for non-commutative RR type instructions
@ -564,8 +570,8 @@ multiclass RRNCbm<string opcStr, bits<8>opc,
multiclass RRm<string opcStr, bits<8>opc,
RegisterClass RC, ValueType Ty,
SDPatternOperator OpNode = null_frag,
Operand immOp = simm7, Operand mOp = mimm> :
RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
Operand immOp = simm7, Operand mOp = mimm, bit MoveImm = 0> :
RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp, MoveImm>;
// Generic RR multiclass for non-commutative instructions with 2 arguments.
// e.g. SUBUL, SUBUW, SUBSWSX, and etc.
@ -923,7 +929,7 @@ multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
//-----------------------------------------------------------------------------
// Multiclass for generic RM instructions
multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC, bit MoveImm = 0> {
def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
!strconcat(opcStr, " $dest, $addr"), []>;
let cy = 0 in
@ -934,14 +940,19 @@ multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
!strconcat(opcStr, " $dest, $addr"), []>;
let cy = 0, cz = 0 in
def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
!strconcat(opcStr, " $dest, $addr"), []>;
!strconcat(opcStr, " $dest, $addr"), []> {
// VE uses LEAzii and LEASLzii as a move immediate instruction, so declare
// it here. An instruction declared as MoveImm will be optimized in
// FoldImmediate later.
let isMoveImm = MoveImm;
}
}
// Section 8.2.1 - LEA
let isReMaterializable = 1, isAsCheapAsAMove = 1,
DecoderMethod = "DecodeLoadI64" in {
let cx = 0 in defm LEA : RMm<"lea", 0x06, I64>;
let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64>;
let cx = 0 in defm LEA : RMm<"lea", 0x06, I64, /* MoveImm */ 1>;
let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64, /* MoveImm */ 1>;
}
// LEA basic patterns.
@ -1218,7 +1229,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm AND : RRm<"and", 0x44, I64, i64, and>;
// Section 8.5.2 - OR (OR)
defm OR : RRm<"or", 0x45, I64, i64, or>;
defm OR : RRm<"or", 0x45, I64, i64, or, simm7, mimm, /* MoveImm */ 1>;
// Section 8.5.3 - XOR (Exclusive OR)
defm XOR : RRm<"xor", 0x46, I64, i64, xor>;

View File

@ -171,8 +171,8 @@ define i128 @_Z24atomic_load_relaxed_i128RNSt3__16atomicInEE(%"struct.std::__1::
; CHECK-NEXT: lea %s0, __atomic_load@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_load@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: ld %s1, -8(, %s9)
@ -196,8 +196,8 @@ define i128 @_Z24atomic_load_relaxed_u128RNSt3__16atomicIoEE(%"struct.std::__1::
; CHECK-NEXT: lea %s0, __atomic_load@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_load@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: ld %s1, -8(, %s9)
@ -332,8 +332,8 @@ define i128 @_Z24atomic_load_acquire_i128RNSt3__16atomicInEE(%"struct.std::__1::
; CHECK-NEXT: lea %s0, __atomic_load@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_load@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 2, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: ld %s1, -8(, %s9)
@ -357,8 +357,8 @@ define i128 @_Z24atomic_load_acquire_u128RNSt3__16atomicIoEE(%"struct.std::__1::
; CHECK-NEXT: lea %s0, __atomic_load@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_load@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 2, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: ld %s1, -8(, %s9)
@ -493,8 +493,8 @@ define i128 @_Z24atomic_load_seq_cst_i128RNSt3__16atomicInEE(%"struct.std::__1::
; CHECK-NEXT: lea %s0, __atomic_load@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_load@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 5, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: ld %s1, -8(, %s9)
@ -518,8 +518,8 @@ define i128 @_Z24atomic_load_seq_cst_u128RNSt3__16atomicIoEE(%"struct.std::__1::
; CHECK-NEXT: lea %s0, __atomic_load@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_load@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 5, (0)1
; CHECK-NEXT: bsic %s10, (, %s12)
; CHECK-NEXT: ld %s1, -8(, %s9)

View File

@ -171,8 +171,8 @@ define void @_Z25atomic_store_relaxed_i128RNSt3__16atomicInEEn(%"struct.std::__1
; CHECK-NEXT: lea %s0, __atomic_store@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_store@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: or %s1, 0, %s4
; CHECK-NEXT: bsic %s10, (, %s12)
@ -197,8 +197,8 @@ define void @_Z25atomic_store_relaxed_u128RNSt3__16atomicIoEEo(%"struct.std::__1
; CHECK-NEXT: lea %s0, __atomic_store@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_store@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: or %s1, 0, %s4
; CHECK-NEXT: bsic %s10, (, %s12)
@ -332,8 +332,8 @@ define void @_Z25atomic_store_release_i128RNSt3__16atomicInEEn(%"struct.std::__1
; CHECK-NEXT: lea %s0, __atomic_store@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_store@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 3, (0)1
; CHECK-NEXT: or %s1, 0, %s4
; CHECK-NEXT: bsic %s10, (, %s12)
@ -358,8 +358,8 @@ define void @_Z25atomic_store_release_u128RNSt3__16atomicIoEEo(%"struct.std::__1
; CHECK-NEXT: lea %s0, __atomic_store@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_store@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 3, (0)1
; CHECK-NEXT: or %s1, 0, %s4
; CHECK-NEXT: bsic %s10, (, %s12)
@ -502,8 +502,8 @@ define void @_Z25atomic_store_seq_cst_i128RNSt3__16atomicInEEn(%"struct.std::__1
; CHECK-NEXT: lea %s0, __atomic_store@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_store@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 5, (0)1
; CHECK-NEXT: or %s1, 0, %s4
; CHECK-NEXT: bsic %s10, (, %s12)
@ -528,8 +528,8 @@ define void @_Z25atomic_store_seq_cst_u128RNSt3__16atomicIoEEo(%"struct.std::__1
; CHECK-NEXT: lea %s0, __atomic_store@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __atomic_store@hi(, %s0)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: lea %s2, -16(, %s9)
; CHECK-NEXT: or %s0, 16, (0)1
; CHECK-NEXT: or %s3, 5, (0)1
; CHECK-NEXT: or %s1, 0, %s4
; CHECK-NEXT: bsic %s10, (, %s12)

View File

@ -522,11 +522,10 @@ define void @br_cc_i128_imm(i128 %0) {
; CHECK-LABEL: br_cc_i128_imm:
; CHECK: .LBB{{[0-9]+}}_4:
; CHECK-NEXT: or %s2, 0, (0)1
; CHECK-NEXT: cmps.l %s1, %s1, %s2
; CHECK-NEXT: cmps.l %s1, %s1, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: cmov.l.gt %s3, (63)0, %s1
; CHECK-NEXT: or %s4, 63, (0)1
; CHECK-NEXT: cmpu.l %s0, %s0, %s4
; CHECK-NEXT: cmpu.l %s0, %s0, (58)0
; CHECK-NEXT: cmov.l.gt %s2, (63)0, %s0
; CHECK-NEXT: cmov.l.eq %s3, %s2, %s1
; CHECK-NEXT: brne.w 0, %s3, .LBB{{[0-9]+}}_2
@ -552,11 +551,10 @@ define void @br_cc_u128_imm(i128 %0) {
; CHECK-LABEL: br_cc_u128_imm:
; CHECK: .LBB{{[0-9]+}}_4:
; CHECK-NEXT: or %s2, 0, (0)1
; CHECK-NEXT: cmps.l %s1, %s1, %s2
; CHECK-NEXT: cmps.l %s1, %s1, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: cmov.l.ne %s3, (63)0, %s1
; CHECK-NEXT: or %s4, 63, (0)1
; CHECK-NEXT: cmpu.l %s0, %s0, %s4
; CHECK-NEXT: cmpu.l %s0, %s0, (58)0
; CHECK-NEXT: cmov.l.gt %s2, (63)0, %s0
; CHECK-NEXT: cmov.l.eq %s3, %s2, %s1
; CHECK-NEXT: brne.w 0, %s3, .LBB{{[0-9]+}}_2
@ -855,13 +853,11 @@ define void @br_cc_imm_u64(i64 %0) {
define void @br_cc_imm_i128(i128 %0) {
; CHECK-LABEL: br_cc_imm_i128:
; CHECK: .LBB{{[0-9]+}}_4:
; CHECK-NEXT: or %s2, -1, (0)1
; CHECK-NEXT: cmps.l %s1, %s1, %s2
; CHECK-NEXT: cmps.l %s1, %s1, (0)0
; CHECK-NEXT: or %s2, 0, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: cmov.l.lt %s3, (63)0, %s1
; CHECK-NEXT: or %s4, -64, (0)1
; CHECK-NEXT: cmpu.l %s0, %s0, %s4
; CHECK-NEXT: cmpu.l %s0, %s0, (58)1
; CHECK-NEXT: cmov.l.lt %s2, (63)0, %s0
; CHECK-NEXT: cmov.l.eq %s3, %s2, %s1
; CHECK-NEXT: brne.w 0, %s3, .LBB{{[0-9]+}}_2
@ -886,13 +882,11 @@ define void @br_cc_imm_i128(i128 %0) {
define void @br_cc_imm_u128(i128 %0) {
; CHECK-LABEL: br_cc_imm_u128:
; CHECK: .LBB{{[0-9]+}}_4:
; CHECK-NEXT: or %s2, -1, (0)1
; CHECK-NEXT: cmps.l %s1, %s1, %s2
; CHECK-NEXT: cmps.l %s1, %s1, (0)0
; CHECK-NEXT: or %s2, 0, (0)1
; CHECK-NEXT: or %s3, 0, (0)1
; CHECK-NEXT: cmov.l.ne %s3, (63)0, %s1
; CHECK-NEXT: or %s4, -64, (0)1
; CHECK-NEXT: cmpu.l %s0, %s0, %s4
; CHECK-NEXT: cmpu.l %s0, %s0, (58)1
; CHECK-NEXT: cmov.l.lt %s2, (63)0, %s0
; CHECK-NEXT: cmov.l.eq %s3, %s2, %s1
; CHECK-NEXT: brne.w 0, %s3, .LBB{{[0-9]+}}_2

View File

@ -553,8 +553,7 @@ define i64 @ull2ull(i64 returned %0) {
define float @ull2f(i64 %x) {
; CHECK-LABEL: ull2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s2, %s0, %s1
; CHECK-NEXT: cmps.l %s2, %s0, (0)1
; CHECK-NEXT: cvt.d.l %s1, %s0
; CHECK-NEXT: cvt.s.d %s1, %s1
; CHECK-NEXT: srl %s3, %s0, 1

View File

@ -9,8 +9,7 @@ declare i8 @llvm.ctlz.i8(i8, i1)
define i128 @func128(i128 %p){
; CHECK-LABEL: func128:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s2, 0, (0)1
; CHECK-NEXT: cmps.l %s2, %s1, %s2
; CHECK-NEXT: cmps.l %s2, %s1, (0)1
; CHECK-NEXT: ldz %s1, %s1
; CHECK-NEXT: ldz %s0, %s0
; CHECK-NEXT: lea %s0, 64(, %s0)
@ -179,8 +178,7 @@ define zeroext i8 @func8iz() {
define i128 @func128x(i128 %p){
; CHECK-LABEL: func128x:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s2, 0, (0)1
; CHECK-NEXT: cmps.l %s2, %s1, %s2
; CHECK-NEXT: cmps.l %s2, %s1, (0)1
; CHECK-NEXT: ldz %s1, %s1
; CHECK-NEXT: ldz %s0, %s0
; CHECK-NEXT: lea %s0, 64(, %s0)

View File

@ -9,8 +9,7 @@ declare i8 @llvm.cttz.i8(i8, i1)
define i128 @func128(i128 %p) {
; CHECK-LABEL: func128:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s2, 0, (0)1
; CHECK-NEXT: cmps.l %s2, %s0, %s2
; CHECK-NEXT: cmps.l %s2, %s0, (0)1
; CHECK-NEXT: lea %s3, -1(, %s0)
; CHECK-NEXT: nnd %s0, %s0, %s3
; CHECK-NEXT: pcnt %s3, %s0

View File

@ -0,0 +1,221 @@
# RUN: llc -mtriple=ve -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck %s
---
## Ensure fold immediate as simm7 at rhs
#CHECK-LABEL: name: addsl_ri
#CHECK: %2:i64 = nsw ADDSLri %0, 5
name: addsl_ri
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: i64, preferred-register: '' }
- { id: 1, class: i64, preferred-register: '' }
liveins:
- { reg: '$sx0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $sx0
%0:i64 = COPY $sx0
%1:i64 = ORim 5, 0
%2:i64 = nsw ADDSLrr %0, %1
$sx0 = COPY %2
RET implicit $sx10, implicit $sx0
...
---
## Ensure fold immediate as mimm at rhs
#CHECK-LABEL: name: addsl_rm
#CHECK: %2:i64 = nsw ADDSLrm %0, 120
name: addsl_rm
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: i64, preferred-register: '' }
- { id: 1, class: i64, preferred-register: '' }
liveins:
- { reg: '$sx0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $sx0
%0:i64 = COPY $sx0
%1:i64 = ORim 0, 120
%2:i64 = nsw ADDSLrr %0, %1
$sx0 = COPY %2
RET implicit $sx10, implicit $sx0
...
---
## Ensure fold immediate as simm7 at lhs
#CHECK-LABEL: name: addsl_ri_com
#CHECK: %2:i64 = nsw ADDSLri %0, 5
name: addsl_ri_com
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: i64, preferred-register: '' }
- { id: 1, class: i64, preferred-register: '' }
liveins:
- { reg: '$sx0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $sx0
%0:i64 = COPY $sx0
%1:i64 = ORim 5, 0
%2:i64 = nsw ADDSLrr %1, %0
$sx0 = COPY %2
RET implicit $sx10, implicit $sx0
...
---
## Ensure fold immediate as mimm at rhs
#CHECK-LABEL: name: addsl_rm_com
#CHECK: %2:i64 = nsw ADDSLrm %0, 120
name: addsl_rm_com
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: i64, preferred-register: '' }
- { id: 1, class: i64, preferred-register: '' }
liveins:
- { reg: '$sx0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $sx0
%0:i64 = COPY $sx0
%1:i64 = ORim 0, 120
%2:i64 = nsw ADDSLrr %1, %0
$sx0 = COPY %2
RET implicit $sx10, implicit $sx0
...

View File

@ -0,0 +1,111 @@
# RUN: llc -mtriple=ve -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck %s
---
## Ensure fold immediate as simm7 at rhs
#CHECK-LABEL: name: cmpsl_ir
#CHECK: %2:i64 = nsw CMPSLir 5, %0
name: cmpsl_ir
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: i64, preferred-register: '' }
- { id: 1, class: i64, preferred-register: '' }
liveins:
- { reg: '$sx0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $sx0
%0:i64 = COPY $sx0
%1:i64 = ORim 5, 0
%2:i64 = nsw CMPSLrr %1, %0
$sx0 = COPY %2
RET implicit $sx10, implicit $sx0
...
---
## Ensure fold immediate as mimm at rhs
#CHECK-LABEL: name: cmpsl_rm
#CHECK: %2:i64 = nsw CMPSLrm %0, 120
name: cmpsl_rm
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: i64, preferred-register: '' }
- { id: 1, class: i64, preferred-register: '' }
liveins:
- { reg: '$sx0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0:
liveins: $sx0
%0:i64 = COPY $sx0
%1:i64 = ORim 0, 120
%2:i64 = nsw CMPSLrr %0, %1
$sx0 = COPY %2
RET implicit $sx10, implicit $sx0
...

View File

@ -3,7 +3,7 @@
; Function Attrs: norecurse nounwind readnone
define float @c2f(i8 signext %a) {
; CHECK-LABEL: c2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.s.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -14,7 +14,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define float @s2f(i16 signext %a) {
; CHECK-LABEL: s2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.s.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -25,7 +25,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define float @i2f(i32 %a) {
; CHECK-LABEL: i2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.s.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -36,7 +36,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define float @l2f(i64 %a) {
; CHECK-LABEL: l2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.d.l %s0, %s0
; CHECK-NEXT: cvt.s.d %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
@ -48,7 +48,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define float @uc2f(i8 zeroext %a) {
; CHECK-LABEL: uc2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.s.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -59,7 +59,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define float @us2f(i16 zeroext %a) {
; CHECK-LABEL: us2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.s.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -70,7 +70,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define float @ui2f(i32 %a) {
; CHECK-LABEL: ui2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: cvt.d.l %s0, %s0
; CHECK-NEXT: cvt.s.d %s0, %s0
@ -83,9 +83,8 @@ entry:
; Function Attrs: norecurse nounwind readnone
define float @ul2f(i64 %a) {
; CHECK-LABEL: ul2f:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s2, %s0, %s1
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cmps.l %s2, %s0, (0)1
; CHECK-NEXT: cvt.d.l %s1, %s0
; CHECK-NEXT: cvt.s.d %s1, %s1
; CHECK-NEXT: srl %s3, %s0, 1
@ -105,7 +104,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @c2d(i8 signext %a) {
; CHECK-LABEL: c2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.d.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -116,7 +115,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @s2d(i16 signext %a) {
; CHECK-LABEL: s2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.d.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -127,7 +126,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @i2d(i32 %a) {
; CHECK-LABEL: i2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.d.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -138,7 +137,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @l2d(i64 %a) {
; CHECK-LABEL: l2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.d.l %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -149,7 +148,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @uc2d(i8 zeroext %a) {
; CHECK-LABEL: uc2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.d.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -160,7 +159,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @us2d(i16 zeroext %a) {
; CHECK-LABEL: us2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: cvt.d.w %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
entry:
@ -171,7 +170,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @ui2d(i32 %a) {
; CHECK-LABEL: ui2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: cvt.d.l %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
@ -183,7 +182,7 @@ entry:
; Function Attrs: norecurse nounwind readnone
define double @ul2d(i64 %a) {
; CHECK-LABEL: ul2d:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK: .LBB{{[0-9]+}}_2: # %entry
; CHECK-NEXT: srl %s1, %s0, 32
; CHECK-NEXT: lea.sl %s2, 1160773632
; CHECK-NEXT: or %s1, %s1, %s2

View File

@ -129,8 +129,7 @@ define zeroext i1 @select_cc_i128_i1(i128 %0, i128 %1, i1 zeroext %2, i1 zeroext
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -146,8 +145,7 @@ define zeroext i1 @select_cc_u128_i1(i128 %0, i128 %1, i1 zeroext %2, i1 zeroext
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -319,8 +317,7 @@ define signext i8 @select_cc_i128_i8(i128 %0, i128 %1, i8 signext %2, i8 signext
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.sx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -336,8 +333,7 @@ define signext i8 @select_cc_u128_i8(i128 %0, i128 %1, i8 signext %2, i8 signext
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.sx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -509,8 +505,7 @@ define zeroext i8 @select_cc_i128_u8(i128 %0, i128 %1, i8 zeroext %2, i8 zeroext
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -526,8 +521,7 @@ define zeroext i8 @select_cc_u128_u8(i128 %0, i128 %1, i8 zeroext %2, i8 zeroext
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -699,8 +693,7 @@ define signext i16 @select_cc_i128_i16(i128 %0, i128 %1, i16 signext %2, i16 sig
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.sx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -716,8 +709,7 @@ define signext i16 @select_cc_u128_i16(i128 %0, i128 %1, i16 signext %2, i16 sig
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.sx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -889,8 +881,7 @@ define zeroext i16 @select_cc_i128_u16(i128 %0, i128 %1, i16 zeroext %2, i16 zer
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -906,8 +897,7 @@ define zeroext i16 @select_cc_u128_u16(i128 %0, i128 %1, i16 zeroext %2, i16 zer
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -1079,8 +1069,7 @@ define signext i32 @select_cc_i128_i32(i128 %0, i128 %1, i32 signext %2, i32 sig
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.sx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -1096,8 +1085,7 @@ define signext i32 @select_cc_u128_i32(i128 %0, i128 %1, i32 signext %2, i32 sig
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.sx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -1269,8 +1257,7 @@ define zeroext i32 @select_cc_i128_u32(i128 %0, i128 %1, i32 zeroext %2, i32 zer
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -1286,8 +1273,7 @@ define zeroext i32 @select_cc_u128_u32(i128 %0, i128 %1, i32 zeroext %2, i32 zer
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: adds.w.zx %s0, %s5, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -1460,8 +1446,7 @@ define i64 @select_cc_i128_i64(i128 %0, i128 %1, i64 %2, i64 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -1477,8 +1462,7 @@ define i64 @select_cc_u128_i64(i128 %0, i128 %1, i64 %2, i64 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -1651,8 +1635,7 @@ define i64 @select_cc_i128_u64(i128 %0, i128 %1, i64 %2, i64 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -1668,8 +1651,7 @@ define i64 @select_cc_u128_u64(i128 %0, i128 %1, i64 %2, i64 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -1860,8 +1842,7 @@ define i128 @select_cc_i128_i128(i128 %0, i128 %1, i128 %2, i128 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s6, %s4, %s0
; CHECK-NEXT: cmov.l.eq %s7, %s5, %s0
; CHECK-NEXT: or %s0, 0, %s6
@ -1879,8 +1860,7 @@ define i128 @select_cc_u128_i128(i128 %0, i128 %1, i128 %2, i128 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s6, %s4, %s0
; CHECK-NEXT: cmov.l.eq %s7, %s5, %s0
; CHECK-NEXT: or %s0, 0, %s6
@ -2079,8 +2059,7 @@ define i128 @select_cc_i128_u128(i128 %0, i128 %1, i128 %2, i128 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s6, %s4, %s0
; CHECK-NEXT: cmov.l.eq %s7, %s5, %s0
; CHECK-NEXT: or %s0, 0, %s6
@ -2098,8 +2077,7 @@ define i128 @select_cc_u128_u128(i128 %0, i128 %1, i128 %2, i128 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s6, %s4, %s0
; CHECK-NEXT: cmov.l.eq %s7, %s5, %s0
; CHECK-NEXT: or %s0, 0, %s6
@ -2280,8 +2258,7 @@ define float @select_cc_i128_float(i128 %0, i128 %1, float %2, float %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -2297,8 +2274,7 @@ define float @select_cc_u128_float(i128 %0, i128 %1, float %2, float %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -2471,8 +2447,7 @@ define double @select_cc_i128_double(i128 %0, i128 %1, double %2, double %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -2488,8 +2463,7 @@ define double @select_cc_u128_double(i128 %0, i128 %1, double %2, double %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s5, %s4, %s0
; CHECK-NEXT: or %s0, 0, %s5
; CHECK-NEXT: or %s11, 0, %s9
@ -2680,8 +2654,7 @@ define fp128 @select_cc_i128_quad(i128 %0, i128 %1, fp128 %2, fp128 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s6, %s4, %s0
; CHECK-NEXT: cmov.l.eq %s7, %s5, %s0
; CHECK-NEXT: or %s0, 0, %s6
@ -2699,8 +2672,7 @@ define fp128 @select_cc_u128_quad(i128 %0, i128 %1, fp128 %2, fp128 %3) {
; CHECK-NEXT: xor %s1, %s1, %s3
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s6, %s4, %s0
; CHECK-NEXT: cmov.l.eq %s7, %s5, %s0
; CHECK-NEXT: or %s0, 0, %s6

View File

@ -129,7 +129,7 @@ define zeroext i1 @setcc_i128(i128 %0, i128 %1) {
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0
; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1
; CHECK-NEXT: or %s11, 0, %s9
@ -145,7 +145,7 @@ define zeroext i1 @setcc_u128(i128 %0, i128 %1) {
; CHECK-NEXT: xor %s0, %s0, %s2
; CHECK-NEXT: or %s0, %s0, %s1
; CHECK-NEXT: or %s1, 0, (0)1
; CHECK-NEXT: cmps.l %s0, %s0, %s1
; CHECK-NEXT: cmps.l %s0, %s0, (0)1
; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0
; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1
; CHECK-NEXT: or %s11, 0, %s9