forked from OSchip/llvm-project
[PowerPC] Handle __builtin_xxpermdi the same way as GCC does
The codegen for this builtin was initially implemented to match GCC. However, due to interest from users GCC changed behaviour to account for the big endian bias of the instruction and correct it. This patch brings the handling inline with GCC. Fixes https://bugs.llvm.org/show_bug.cgi?id=38192 Differential Revision: https://reviews.llvm.org/D49424 llvm-svn: 337449
This commit is contained in:
parent
7fbf06c10b
commit
1ac56bd33f
|
@ -10831,19 +10831,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|||
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
||||
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
|
||||
|
||||
// Element zero comes from the first input vector and element one comes from
|
||||
// the second. The element indices within each vector are numbered in big
|
||||
// endian order so the shuffle mask must be adjusted for this on little
|
||||
// endian platforms (i.e. index is complemented and source vector reversed).
|
||||
unsigned ElemIdx0;
|
||||
unsigned ElemIdx1;
|
||||
if (getTarget().isLittleEndian()) {
|
||||
ElemIdx0 = (~Index & 1) + 2;
|
||||
ElemIdx1 = (~Index & 2) >> 1;
|
||||
} else { // BigEndian
|
||||
ElemIdx0 = (Index & 2) >> 1;
|
||||
ElemIdx1 = 2 + (Index & 1);
|
||||
}
|
||||
// Account for endianness by treating this as just a shuffle. So we use the
|
||||
// same indices for both LE and BE in order to produce expected results in
|
||||
// both cases.
|
||||
unsigned ElemIdx0 = (Index & 2) >> 1;;
|
||||
unsigned ElemIdx1 = 2 + (Index & 1);;
|
||||
|
||||
Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
|
||||
ConstantInt::get(Int32Ty, ElemIdx1)};
|
||||
|
|
|
@ -1694,43 +1694,43 @@ vec_xst_be(vd, sll, ad);
|
|||
|
||||
res_vd = vec_xxpermdi(vd, vd, 0);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 2>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 1>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 2>
|
||||
|
||||
res_vf = vec_xxpermdi(vf, vf, 1);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 2, i32 1>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
|
||||
|
||||
res_vsll = vec_xxpermdi(vsll, vsll, 2);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 2>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 0>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 2>
|
||||
|
||||
res_vull = vec_xxpermdi(vull, vull, 3);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 3>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 2, i32 0>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 3>
|
||||
|
||||
res_vsi = vec_xxpermdi(vsi, vsi, 0);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 2>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 1>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 2>
|
||||
|
||||
res_vui = vec_xxpermdi(vui, vui, 1);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 2, i32 1>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
|
||||
|
||||
res_vss = vec_xxpermdi(vss, vss, 2);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 2>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 0>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 2>
|
||||
|
||||
res_vus = vec_xxpermdi(vus, vus, 3);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 3>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 2, i32 0>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 1, i32 3>
|
||||
|
||||
res_vsc = vec_xxpermdi(vsc, vsc, 0);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 2>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 1>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 2>
|
||||
|
||||
res_vuc = vec_xxpermdi(vuc, vuc, 1);
|
||||
// CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 2, i32 1>
|
||||
// CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
|
||||
|
||||
res_vd = vec_xxsldwi(vd, vd, 0);
|
||||
// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -1786,7 +1786,7 @@ vector int xxpermdi_should_not_assert(vector int a, vector int b) {
|
|||
|
||||
// CHECK-LE: bitcast <4 x i32> %{{[0-9]+}} to <2 x i64>
|
||||
// CHECK-LE-NEXT: bitcast <4 x i32> %{{[0-9]+}} to <2 x i64>
|
||||
// CHECK-LE-NEXT: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 1>
|
||||
// CHECK-LE-NEXT: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 2>
|
||||
// CHECK-LE-NEXT: bitcast <2 x i64> %{{[0-9]+}} to <4 x i32>
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue