[FastISel][AArch64] Add support for variable shift.

This adds the missing variable shift support for value type i8, i16, and i32.

This fixes <rdar://problem/18095685>.

llvm-svn: 216242
This commit is contained in:
Juergen Ributzka 2014-08-21 23:06:07 +00:00
parent 2c52c66816
commit 0e0b4c1cda
2 changed files with 253 additions and 44 deletions

View File

@ -128,7 +128,7 @@ private:
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
bool SelectMul(const Instruction *I);
bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
bool SelectShift(const Instruction *I);
bool SelectBitCast(const Instruction *I);
// Utility helper routines.
@ -193,9 +193,15 @@ private:
unsigned Op1, bool Op1IsKill);
unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
unsigned Op1Reg, bool Op1IsKill);
unsigned emitLSL_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
unsigned Op1Reg, bool Op1IsKill);
unsigned emitLSR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
unsigned Op1Reg, bool Op1IsKill);
unsigned emitASR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
@ -703,8 +709,8 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
Addr.getOffsetReg(), /*TODO:IsKill=*/false,
Addr.getShift());
else
ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
/*Op0IsKill=*/false, Addr.getShift());
ResultReg = emitLSL_ri(MVT::i64, Addr.getOffsetReg(), /*Op0IsKill=*/false,
Addr.getShift());
if (!ResultReg)
return false;
@ -2366,7 +2372,7 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
if (VT == MVT::i32) {
MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
unsigned ShiftReg = emitLSR_ri(MVT::i64, MulReg, /*IsKill=*/false, 32);
MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
AArch64::sub_32);
ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
@ -2653,8 +2659,34 @@ unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
AArch64::XZR, /*IsKill=*/true);
}
unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
unsigned Op1Reg, bool Op1IsKill) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
case MVT::i32: Opc = AArch64::LSLVWr; break;
case MVT::i64: Opc = AArch64::LSLVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op1IsKill = true;
}
unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
Op1IsKill);
if (NeedTrunc)
ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
return ResultReg;
}
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned Opc, ImmR, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
@ -2673,8 +2705,35 @@ unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
}
unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
unsigned Op1Reg, bool Op1IsKill) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
case MVT::i32: Opc = AArch64::LSRVWr; break;
case MVT::i64: Opc = AArch64::LSRVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op0IsKill = Op1IsKill = true;
}
unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
Op1IsKill);
if (NeedTrunc)
ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
return ResultReg;
}
unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
@ -2689,8 +2748,35 @@ unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, Shift, ImmS);
}
unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
unsigned Op1Reg, bool Op1IsKill) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
case MVT::i32: Opc = AArch64::ASRVWr; break;
case MVT::i64: Opc = AArch64::ASRVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op0IsKill = Op1IsKill = true;
}
unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
Op1IsKill);
if (NeedTrunc)
ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
return ResultReg;
}
unsigned AArch64FastISel::emitASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
@ -2892,31 +2978,56 @@ bool AArch64FastISel::SelectMul(const Instruction *I) {
return true;
}
bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
bool IsArithmetic) {
bool AArch64FastISel::SelectShift(const Instruction *I) {
EVT RetEVT = TLI.getValueType(I->getType(), true);
if (!RetEVT.isSimple())
return false;
MVT RetVT = RetEVT.getSimpleVT();
if (!isa<ConstantInt>(I->getOperand(1)))
return false;
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = 0;
uint64_t ShiftVal = C->getZExtValue();
switch (I->getOpcode()) {
default: llvm_unreachable("Unexpected instruction.");
case Instruction::Shl:
ResultReg = emitLSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
break;
case Instruction::AShr:
ResultReg = emitASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
break;
case Instruction::LShr:
ResultReg = emitLSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
break;
}
if (!ResultReg)
return false;
unsigned ResultReg;
if (IsLeftShift)
ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
else {
if (IsArithmetic)
ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
else
ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
UpdateValueMap(I, ResultReg);
return true;
}
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (!Op1Reg)
return false;
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
unsigned ResultReg = 0;
switch (I->getOpcode()) {
default: llvm_unreachable("Unexpected instruction.");
case Instruction::Shl:
ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
break;
case Instruction::AShr:
ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
break;
case Instruction::LShr:
ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
break;
}
if (!ResultReg)
@ -3012,12 +3123,10 @@ bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
// selector -> improve FastISel tblgen.
case Instruction::Mul:
return SelectMul(I);
case Instruction::Shl:
return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
case Instruction::LShr:
return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
case Instruction::Shl: // fall-through
case Instruction::LShr: // fall-through
case Instruction::AShr:
return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
return SelectShift(I);
case Instruction::BitCast:
return SelectBitCast(I);
}

View File

@ -1,87 +1,187 @@
; RUN: llc -fast-isel -fast-isel-abort -mtriple=arm64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: lslv_i8
; CHECK: and [[REG1:w[0-9]+]], w1, #0xff
; CHECK-NEXT: lsl [[REG2:w[0-9]+]], w0, [[REG1]]
; CHECK-NEXT: and {{w[0-9]+}}, [[REG2]], #0xff
define zeroext i8 @lslv_i8(i8 %a, i8 %b) {
%1 = shl i8 %a, %b
ret i8 %1
}
; CHECK-LABEL: lsl_i8
; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
define zeroext i8 @lsl_i8(i8 %a) {
%1 = shl i8 %a, 4
ret i8 %1
}
; CHECK-LABEL: lslv_i16
; CHECK: and [[REG1:w[0-9]+]], w1, #0xffff
; CHECK-NEXT: lsl [[REG2:w[0-9]+]], w0, [[REG1]]
; CHECK-NEXT: and {{w[0-9]+}}, [[REG2]], #0xffff
define zeroext i16 @lslv_i16(i16 %a, i16 %b) {
%1 = shl i16 %a, %b
ret i16 %1
}
; CHECK-LABEL: lsl_i16
; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
; CHECK: ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
define zeroext i16 @lsl_i16(i16 %a) {
%1 = shl i16 %a, 8
ret i16 %1
}
; CHECK-LABEL: lslv_i32
; CHECK: lsl {{w[0-9]*}}, w0, w1
define zeroext i32 @lslv_i32(i32 %a, i32 %b) {
%1 = shl i32 %a, %b
ret i32 %1
}
; CHECK-LABEL: lsl_i32
; CHECK: lsl {{w[0-9]*}}, {{w[0-9]*}}, #16
; CHECK: lsl {{w[0-9]*}}, {{w[0-9]*}}, #16
define zeroext i32 @lsl_i32(i32 %a) {
%1 = shl i32 %a, 16
ret i32 %1
}
; CHECK-LABEL: lslv_i64
; CHECK: lsl {{x[0-9]*}}, x0, x1
define i64 @lslv_i64(i64 %a, i64 %b) {
%1 = shl i64 %a, %b
ret i64 %1
}
; FIXME: This shouldn't use the variable shift version.
; CHECK-LABEL: lsl_i64
; CHECK: lsl {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
; CHECK: lsl {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
define i64 @lsl_i64(i64 %a) {
%1 = shl i64 %a, 32
ret i64 %1
}
; CHECK-LABEL: lsrv_i8
; CHECK: and [[REG1:w[0-9]+]], w0, #0xff
; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xff
; CHECK-NEXT: lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xff
define zeroext i8 @lsrv_i8(i8 %a, i8 %b) {
%1 = lshr i8 %a, %b
ret i8 %1
}
; CHECK-LABEL: lsr_i8
; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
define zeroext i8 @lsr_i8(i8 %a) {
%1 = lshr i8 %a, 4
ret i8 %1
}
; CHECK-LABEL: lsrv_i16
; CHECK: and [[REG1:w[0-9]+]], w0, #0xffff
; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xffff
; CHECK-NEXT: lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xffff
define zeroext i16 @lsrv_i16(i16 %a, i16 %b) {
%1 = lshr i16 %a, %b
ret i16 %1
}
; CHECK-LABEL: lsr_i16
; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
; CHECK: ubfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
define zeroext i16 @lsr_i16(i16 %a) {
%1 = lshr i16 %a, 8
ret i16 %1
}
; CHECK-LABEL: lsrv_i32
; CHECK: lsr {{w[0-9]*}}, w0, w1
define zeroext i32 @lsrv_i32(i32 %a, i32 %b) {
%1 = lshr i32 %a, %b
ret i32 %1
}
; CHECK-LABEL: lsr_i32
; CHECK: lsr {{w[0-9]*}}, {{w[0-9]*}}, #16
; CHECK: lsr {{w[0-9]*}}, {{w[0-9]*}}, #16
define zeroext i32 @lsr_i32(i32 %a) {
%1 = lshr i32 %a, 16
ret i32 %1
}
; CHECK-LABEL: lsrv_i64
; CHECK: lsr {{x[0-9]*}}, x0, x1
define i64 @lsrv_i64(i64 %a, i64 %b) {
%1 = lshr i64 %a, %b
ret i64 %1
}
; FIXME: This shouldn't use the variable shift version.
; CHECK-LABEL: lsr_i64
; CHECK: lsr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
; CHECK: lsr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
define i64 @lsr_i64(i64 %a) {
%1 = lshr i64 %a, 32
ret i64 %1
}
; CHECK-LABEL: asrv_i8
; CHECK: sxtb [[REG1:w[0-9]+]], w0
; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xff
; CHECK-NEXT: asr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xff
define zeroext i8 @asrv_i8(i8 %a, i8 %b) {
%1 = ashr i8 %a, %b
ret i8 %1
}
; CHECK-LABEL: asr_i8
; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
define zeroext i8 @asr_i8(i8 %a) {
%1 = ashr i8 %a, 4
ret i8 %1
}
; CHECK-LABEL: asrv_i16
; CHECK: sxth [[REG1:w[0-9]+]], w0
; CHECK-NEXT: and [[REG2:w[0-9]+]], w1, #0xffff
; CHECK-NEXT: asr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
; CHECK-NEXT: and {{w[0-9]+}}, [[REG3]], #0xffff
define zeroext i16 @asrv_i16(i16 %a, i16 %b) {
%1 = ashr i16 %a, %b
ret i16 %1
}
; CHECK-LABEL: asr_i16
; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
; CHECK: sbfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
define zeroext i16 @asr_i16(i16 %a) {
%1 = ashr i16 %a, 8
ret i16 %1
}
; CHECK-LABEL: asrv_i32
; CHECK: asr {{w[0-9]*}}, w0, w1
define zeroext i32 @asrv_i32(i32 %a, i32 %b) {
%1 = ashr i32 %a, %b
ret i32 %1
}
; CHECK-LABEL: asr_i32
; CHECK: asr {{w[0-9]*}}, {{w[0-9]*}}, #16
; CHECK: asr {{w[0-9]*}}, {{w[0-9]*}}, #16
define zeroext i32 @asr_i32(i32 %a) {
%1 = ashr i32 %a, 16
ret i32 %1
}
; CHECK-LABEL: asrv_i64
; CHECK: asr {{x[0-9]*}}, x0, x1
define i64 @asrv_i64(i64 %a, i64 %b) {
%1 = ashr i64 %a, %b
ret i64 %1
}
; FIXME: This shouldn't use the variable shift version.
; CHECK-LABEL: asr_i64
; CHECK: asr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
; CHECK: asr {{x[0-9]*}}, {{x[0-9]*}}, {{x[0-9]*}}
define i64 @asr_i64(i64 %a) {
%1 = ashr i64 %a, 32
ret i64 %1