[AArch64] Add support for NEON scalar shift immediate instructions.

llvm-svn: 193791
This commit is contained in:
Chad Rosier 2013-10-31 19:29:05 +00:00
parent 20e1f20d69
commit bdca387884
4 changed files with 435 additions and 2 deletions

View File

@ -106,6 +106,7 @@ class Inst <string n, string p, string t, Op o> {
string Types = t;
Op Operand = o;
bit isShift = 0;
bit isScalarShift = 0;
bit isVCVT_N = 0;
bit isA64 = 0;
@ -784,6 +785,41 @@ def SCALAR_QRSHL: SInst<"vqrshl", "sss", "ScSsSiSlSUcSUsSUiSUl">;
// Scalar Shift Rouding Left
def SCALAR_RSHL: SInst<"vrshl", "sss", "SlSUl">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Shift (Immediate)
let isScalarShift = 1 in {
// Signed/Unsigned Shift Right (Immediate)
def SCALAR_SSHR_N: SInst<"vshr_n", "ssi", "SlSUl">;
// Signed/Unsigned Rounding Shift Right (Immediate)
def SCALAR_SRSHR_N: SInst<"vrshr_n", "ssi", "SlSUl">;
// Signed/Unsigned Shift Right and Accumulate (Immediate)
def SCALAR_SSRA_N: SInst<"vsra_n", "sssi", "SlSUl">;
// Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
def SCALAR_SRSRA_N: SInst<"vrsra_n", "sssi", "SlSUl">;
// Shift Left (Immediate)
def SCALAR_SHL_N: SInst<"vshl_n", "ssi", "SlSUl">;
// Signed/Unsigned Saturating Shift Left (Immediate)
def SCALAR_SQSHL_N: SInst<"vqshl_n", "ssi", "ScSsSiSlSUcSUsSUiSUl">;
// Signed Saturating Shift Left Unsigned (Immediate)
def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "ssi", "ScSsSiSl">;
// Shift Right And Insert (Immediate)
def SCALAR_SRI_N: SInst<"vsri_n", "ssi", "SlSUl">;
// Shift Left And Insert (Immediate)
def SCALAR_SLI_N: SInst<"vsli_n", "ssi", "SlSUl">;
// Signed/Unsigned Saturating Shift Right Narrow (Immediate)
def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "zsi", "SsSiSlSUsSUiSUl">;
// Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "zsi", "SsSiSlSUsSUiSUl">;
// Signed Saturating Shift Right Unsigned Narrow (Immediate)
def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "zsi", "SsSiSl">;
// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "zsi", "SsSiSl">;
}
////////////////////////////////////////////////////////////////////////////////
// Scalar Reduce Pairwise Addition (Scalar and Floating Point)
def SCALAR_ADDP : SInst<"vpadd", "sd", "SfSHlSHd">;

View File

@ -2227,6 +2227,110 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
case AArch64::BI__builtin_neon_vqmovnd_u64:
Int = Intrinsic::arm_neon_vqmovnu;
s = "vqmovn"; OverloadNarrowInt = true; break;
// Scalar Signed Shift Right (Immediate)
case AArch64::BI__builtin_neon_vshrd_n_s64:
Int = Intrinsic::aarch64_neon_vshrds_n;
s = "vsshr"; OverloadInt = false; break;
// Scalar Unsigned Shift Right (Immediate)
case AArch64::BI__builtin_neon_vshrd_n_u64:
Int = Intrinsic::aarch64_neon_vshrdu_n;
s = "vushr"; OverloadInt = false; break;
// Scalar Signed Rounding Shift Right (Immediate)
case AArch64::BI__builtin_neon_vrshrd_n_s64:
Int = Intrinsic::aarch64_neon_vrshrds_n;
s = "vsrshr"; OverloadInt = false; break;
// Scalar Unsigned Rounding Shift Right (Immediate)
case AArch64::BI__builtin_neon_vrshrd_n_u64:
Int = Intrinsic::aarch64_neon_vrshrdu_n;
s = "vurshr"; OverloadInt = false; break;
// Scalar Signed Shift Right and Accumulate (Immediate)
case AArch64::BI__builtin_neon_vsrad_n_s64:
Int = Intrinsic::aarch64_neon_vsrads_n;
s = "vssra"; OverloadInt = false; break;
// Scalar Unsigned Shift Right and Accumulate (Immediate)
case AArch64::BI__builtin_neon_vsrad_n_u64:
Int = Intrinsic::aarch64_neon_vsradu_n;
s = "vusra"; OverloadInt = false; break;
// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
case AArch64::BI__builtin_neon_vrsrad_n_s64:
Int = Intrinsic::aarch64_neon_vrsrads_n;
s = "vsrsra"; OverloadInt = false; break;
// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
case AArch64::BI__builtin_neon_vrsrad_n_u64:
Int = Intrinsic::aarch64_neon_vrsradu_n;
s = "vursra"; OverloadInt = false; break;
// Scalar Signed/Unsigned Shift Left (Immediate)
case AArch64::BI__builtin_neon_vshld_n_s64:
case AArch64::BI__builtin_neon_vshld_n_u64:
Int = Intrinsic::aarch64_neon_vshld_n;
s = "vshl"; OverloadInt = false; break;
// Signed Saturating Shift Left (Immediate)
case AArch64::BI__builtin_neon_vqshlb_n_s8:
case AArch64::BI__builtin_neon_vqshlh_n_s16:
case AArch64::BI__builtin_neon_vqshls_n_s32:
case AArch64::BI__builtin_neon_vqshld_n_s64:
Int = Intrinsic::aarch64_neon_vqshls_n;
s = "vsqshl"; OverloadInt = true; break;
// Unsigned Saturating Shift Left (Immediate)
case AArch64::BI__builtin_neon_vqshlb_n_u8:
case AArch64::BI__builtin_neon_vqshlh_n_u16:
case AArch64::BI__builtin_neon_vqshls_n_u32:
case AArch64::BI__builtin_neon_vqshld_n_u64:
Int = Intrinsic::aarch64_neon_vqshlu_n;
s = "vuqshl"; OverloadInt = true; break;
// Signed Saturating Shift Left Unsigned (Immediate)
case AArch64::BI__builtin_neon_vqshlub_n_s8:
case AArch64::BI__builtin_neon_vqshluh_n_s16:
case AArch64::BI__builtin_neon_vqshlus_n_s32:
case AArch64::BI__builtin_neon_vqshlud_n_s64:
Int = Intrinsic::aarch64_neon_vqshlus_n;
s = "vsqshlu"; OverloadInt = true; break;
// Shift Right And Insert (Immediate)
case AArch64::BI__builtin_neon_vsrid_n_s64:
case AArch64::BI__builtin_neon_vsrid_n_u64:
Int = Intrinsic::aarch64_neon_vsrid_n;
s = "vsri"; OverloadInt = false; break;
// Shift Left And Insert (Immediate)
case AArch64::BI__builtin_neon_vslid_n_s64:
case AArch64::BI__builtin_neon_vslid_n_u64:
Int = Intrinsic::aarch64_neon_vslid_n;
s = "vsli"; OverloadInt = false; break;
// Signed Saturating Shift Right Narrow (Immediate)
case AArch64::BI__builtin_neon_vqshrnh_n_s16:
case AArch64::BI__builtin_neon_vqshrns_n_s32:
case AArch64::BI__builtin_neon_vqshrnd_n_s64:
Int = Intrinsic::aarch64_neon_vsqshrn;
s = "vsqshrn"; OverloadInt = true; break;
// Unsigned Saturating Shift Right Narrow (Immediate)
case AArch64::BI__builtin_neon_vqshrnh_n_u16:
case AArch64::BI__builtin_neon_vqshrns_n_u32:
case AArch64::BI__builtin_neon_vqshrnd_n_u64:
Int = Intrinsic::aarch64_neon_vuqshrn;
s = "vuqshrn"; OverloadInt = true; break;
// Signed Saturating Rounded Shift Right Narrow (Immediate)
case AArch64::BI__builtin_neon_vqrshrnh_n_s16:
case AArch64::BI__builtin_neon_vqrshrns_n_s32:
case AArch64::BI__builtin_neon_vqrshrnd_n_s64:
Int = Intrinsic::aarch64_neon_vsqrshrn;
s = "vsqrshrn"; OverloadInt = true; break;
// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
case AArch64::BI__builtin_neon_vqrshrnh_n_u16:
case AArch64::BI__builtin_neon_vqrshrns_n_u32:
case AArch64::BI__builtin_neon_vqrshrnd_n_u64:
Int = Intrinsic::aarch64_neon_vuqrshrn;
s = "vuqrshrn"; OverloadInt = true; break;
// Signed Saturating Shift Right Unsigned Narrow (Immediate)
case AArch64::BI__builtin_neon_vqshrunh_n_s16:
case AArch64::BI__builtin_neon_vqshruns_n_s32:
case AArch64::BI__builtin_neon_vqshrund_n_s64:
Int = Intrinsic::aarch64_neon_vsqshrun;
s = "vsqshrun"; OverloadInt = true; break;
// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
case AArch64::BI__builtin_neon_vqrshrunh_n_s16:
case AArch64::BI__builtin_neon_vqrshruns_n_s32:
case AArch64::BI__builtin_neon_vqrshrund_n_s64:
Int = Intrinsic::aarch64_neon_vsqrshrun;
s = "vsqrshrun"; OverloadInt = true; break;
}
if (!Int)
@ -2254,8 +2358,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
assert(E->getNumArgs() == 1);
} else if (OverloadInt) {
// Determine the type of this overloaded AArch64 intrinsic
const Expr *Arg = E->getArg(E->getNumArgs()-1);
llvm::Type *Ty = CGF.ConvertType(Arg->getType());
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
assert(VTy);

View File

@ -7484,3 +7484,266 @@ uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
return (uint64_t)vcaltd_f64(a, b);
}
int64_t test_vshrd_n_s64(int64_t a) {
// CHECK-LABEL: test_vshrd_n_s64
// CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #1
return (int64_t)vshrd_n_s64(a, 1);
}
uint64_t test_vshrd_n_u64(uint64_t a) {
// CHECK-LABEL: test_vshrd_n_u64
// CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #64
return (uint64_t)vshrd_n_u64(a, 64);
}
int64_t test_vrshrd_n_s64(int64_t a) {
// CHECK-LABEL: test_vrshrd_n_s64
// CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63
return (int64_t)vrshrd_n_s64(a, 63);
}
uint64_t test_vrshrd_n_u64(uint64_t a) {
// CHECK-LABEL: test_vrshrd_n_u64
// CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63
return (uint64_t)vrshrd_n_u64(a, 63);
}
int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
// CHECK-LABEL: test_vsrad_n_s64
// CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63
return (int64_t)vsrad_n_s64(a, b, 63);
}
uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
// CHECK-LABEL: test_vsrad_n_u64
// CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63
return (uint64_t)vsrad_n_u64(a, b, 63);
}
int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
// CHECK-LABEL: test_vrsrad_n_s64
// CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63
return (int64_t)vrsrad_n_s64(a, b, 63);
}
uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
// CHECK-LABEL: test_vrsrad_n_u64
// CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63
return (uint64_t)vrsrad_n_u64(a, b, 63);
}
int64_t test_vshld_n_s64(int64_t a) {
// CHECK-LABEL: test_vshld_n_s64
// CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #0
return (int64_t)vshld_n_s64(a, 0);
}
uint64_t test_vshld_n_u64(uint64_t a) {
// CHECK-LABEL: test_vshld_n_u64
// CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
return (uint64_t)vshld_n_u64(a, 63);
}
int8_t test_vqshlb_n_s8(int8_t a) {
// CHECK-LABEL: test_vqshlb_n_s8
// CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
return (int8_t)vqshlb_n_s8(a, 7);
}
int16_t test_vqshlh_n_s16(int16_t a) {
// CHECK-LABEL: test_vqshlh_n_s16
// CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
return (int16_t)vqshlh_n_s16(a, 15);
}
int32_t test_vqshls_n_s32(int32_t a) {
// CHECK-LABEL: test_vqshls_n_s32
// CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
return (int32_t)vqshls_n_s32(a, 31);
}
int64_t test_vqshld_n_s64(int64_t a) {
// CHECK-LABEL: test_vqshld_n_s64
// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
return (int64_t)vqshld_n_s64(a, 63);
}
uint8_t test_vqshlb_n_u8(uint8_t a) {
// CHECK-LABEL: test_vqshlb_n_u8
// CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
return (uint8_t)vqshlb_n_u8(a, 7);
}
uint16_t test_vqshlh_n_u16(uint16_t a) {
// CHECK-LABEL: test_vqshlh_n_u16
// CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
return (uint16_t)vqshlh_n_u16(a, 15);
}
uint32_t test_vqshls_n_u32(uint32_t a) {
// CHECK-LABEL: test_vqshls_n_u32
// CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
return (uint32_t)vqshls_n_u32(a, 31);
}
uint64_t test_vqshld_n_u64(uint64_t a) {
// CHECK-LABEL: test_vqshld_n_u64
// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
return (uint64_t)vqshld_n_u64(a, 63);
}
int8_t test_vqshlub_n_s8(int8_t a) {
// CHECK-LABEL: test_vqshlub_n_s8
// CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7
return (int8_t)vqshlub_n_s8(a, 7);
}
int16_t test_vqshluh_n_s16(int16_t a) {
// CHECK-LABEL: test_vqshluh_n_s16
// CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15
return (int16_t)vqshluh_n_s16(a, 15);
}
int32_t test_vqshlus_n_s32(int32_t a) {
// CHECK-LABEL: test_vqshlus_n_s32
// CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31
return (int32_t)vqshlus_n_s32(a, 31);
}
int64_t test_vqshlud_n_s64(int64_t a) {
// CHECK-LABEL: test_vqshlud_n_s64
// CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63
return (int64_t)vqshlud_n_s64(a, 63);
}
int64_t test_vsrid_n_s64(int64_t a) {
// CHECK-LABEL: test_vsrid_n_s64
// CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
return (int64_t)vsrid_n_s64(a, 63);
}
uint64_t test_vsrid_n_u64(uint64_t a) {
// CHECK-LABEL: test_vsrid_n_u64
// CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
return (uint64_t)vsrid_n_u64(a, 63);
}
int64_t test_vslid_n_s64(int64_t a) {
// CHECK-LABEL: test_vslid_n_s64
// CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
return (int64_t)vslid_n_s64(a, 63);
}
uint64_t test_vslid_n_u64(uint64_t a) {
// CHECK-LABEL: test_vslid_n_u64
// CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
return (uint64_t)vslid_n_u64(a, 63);
}
int8_t test_vqshrnh_n_s16(int16_t a) {
// CHECK-LABEL: test_vqshrnh_n_s16
// CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
return (int8_t)vqshrnh_n_s16(a, 15);
}
int16_t test_vqshrns_n_s32(int32_t a) {
// CHECK-LABEL: test_vqshrns_n_s32
// CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
return (int16_t)vqshrns_n_s32(a, 31);
}
int32_t test_vqshrnd_n_s64(int64_t a) {
// CHECK-LABEL: test_vqshrnd_n_s64
// CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
return (int32_t)vqshrnd_n_s64(a, 63);
}
uint8_t test_vqshrnh_n_u16(uint16_t a) {
// CHECK-LABEL: test_vqshrnh_n_u16
// CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
return (uint8_t)vqshrnh_n_u16(a, 15);
}
uint16_t test_vqshrns_n_u32(uint32_t a) {
// CHECK-LABEL: test_vqshrns_n_u32
// CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
return (uint16_t)vqshrns_n_u32(a, 31);
}
uint32_t test_vqshrnd_n_u64(uint64_t a) {
// CHECK-LABEL: test_vqshrnd_n_u64
// CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
return (uint32_t)vqshrnd_n_u64(a, 63);
}
int8_t test_vqrshrnh_n_s16(int16_t a) {
// CHECK-LABEL: test_vqrshrnh_n_s16
// CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
return (int8_t)vqrshrnh_n_s16(a, 15);
}
int16_t test_vqrshrns_n_s32(int32_t a) {
// CHECK-LABEL: test_vqrshrns_n_s32
// CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
return (int16_t)vqrshrns_n_s32(a, 31);
}
int32_t test_vqrshrnd_n_s64(int64_t a) {
// CHECK-LABEL: test_vqrshrnd_n_s64
// CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
return (int32_t)vqrshrnd_n_s64(a, 63);
}
uint8_t test_vqrshrnh_n_u16(uint16_t a) {
// CHECK-LABEL: test_vqrshrnh_n_u16
// CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
return (uint8_t)vqrshrnh_n_u16(a, 15);
}
uint16_t test_vqrshrns_n_u32(uint32_t a) {
// CHECK-LABEL: test_vqrshrns_n_u32
// CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
return (uint16_t)vqrshrns_n_u32(a, 31);
}
uint32_t test_vqrshrnd_n_u64(uint64_t a) {
// CHECK-LABEL: test_vqrshrnd_n_u64
// CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
return (uint32_t)vqrshrnd_n_u64(a, 63);
}
int8_t test_vqshrunh_n_s16(int16_t a) {
// CHECK-LABEL: test_vqshrunh_n_s16
// CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
return (int8_t)vqshrunh_n_s16(a, 15);
}
int16_t test_vqshruns_n_s32(int32_t a) {
// CHECK-LABEL: test_vqshruns_n_s32
// CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
return (int16_t)vqshruns_n_s32(a, 31);
}
int32_t test_vqshrund_n_s64(int64_t a) {
// CHECK-LABEL: test_vqshrund_n_s64
// CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
return (int32_t)vqshrund_n_s64(a, 63);
}
int8_t test_vqrshrunh_n_s16(int16_t a) {
// CHECK-LABEL: test_vqrshrunh_n_s16
// CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
return (int8_t)vqrshrunh_n_s16(a, 15);
}
int16_t test_vqrshruns_n_s32(int32_t a) {
// CHECK-LABEL: test_vqrshruns_n_s32
// CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
return (int16_t)vqrshruns_n_s32(a, 31);
}
int32_t test_vqrshrund_n_s64(int64_t a) {
// CHECK-LABEL: test_vqrshrund_n_s64
// CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
return (int32_t)vqrshrund_n_s64(a, 63);
}

View File

@ -2383,6 +2383,29 @@ static unsigned RangeFromType(const char mod, StringRef typestr) {
}
}
static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
// base type to get the type string for.
bool dummy = false;
char type = ClassifyType(typestr, dummy, dummy, dummy);
type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
switch (type) {
case 'c':
return 7;
case 'h':
case 's':
return 15;
case 'f':
case 'i':
return 31;
case 'd':
case 'l':
return 63;
default:
PrintFatalError("unhandled type!");
}
}
/// Generate the ARM and AArch64 intrinsic range checking code for
/// shift/lane immediates, checking for unique declarations.
void
@ -2456,6 +2479,14 @@ NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
else
PrintFatalError(R->getLoc(),
"Fixed point convert name should contains \"32\" or \"64\"");
} else if (R->getValueAsBit("isScalarShift")) {
// Right shifts have an 'r' in the name, left shifts do not.
if (name.find('r') != std::string::npos)
rangestr = "l = 1; ";
rangestr += "u = " +
utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]));
} else if (!ProtoHasScalar(Proto)) {
// Builtins which are overloaded by type will need to have their upper
// bound computed at Sema time based on the type constant.