diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6d30d0c26e0a..859ad3ecd9bb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5936,50 +5936,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned NumLanes = NumElts / 16; unsigned NumLaneElts = NumElts / NumLanes; - // If palignr is shifting the pair of input vectors less than the size of - // a lane, emit a shuffle instruction. - if (ShiftVal <= NumLaneElts) { - SmallVector Indices; - // 256-bit palignr operates on 128-bit lanes so we need to handle that - for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = 0; i != NumLaneElts; ++i) { - unsigned Idx = ShiftVal + i; - if (Idx >= NumLaneElts) - Idx += NumElts - NumLaneElts; // End of lane, switch operand. - Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); - } - } - - Value* SV = llvm::ConstantVector::get(Indices); - return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); - } - // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. if (ShiftVal >= (2 * NumLaneElts)) return llvm::Constant::getNullValue(ConvertType(E->getType())); // If palignr is shifting the pair of input vectors more than one lane, - // but less than two lanes, emit a shift. - llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, NumElts/8); - - Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); - Ops[1] = llvm::ConstantInt::get(Int32Ty, (ShiftVal-NumLaneElts) * 8); - - Intrinsic::ID ID; - switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_palignr128: - ID = Intrinsic::x86_sse2_psrl_dq; - break; - case X86::BI__builtin_ia32_palignr256: - ID = Intrinsic::x86_avx2_psrl_dq; - break; + // but less than two lanes, convert to shifting in zeroes. + if (ShiftVal > NumLaneElts) { + ShiftVal -= NumLaneElts; + Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - // create i32 constant - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); + SmallVector Indices; + // 256-bit palignr operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Idx = ShiftVal + i; + if (Idx >= NumLaneElts) + Idx += NumElts - NumLaneElts; // End of lane, switch operand. + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + } + } + + Value* SV = llvm::ConstantVector::get(Indices); + return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); } case X86::BI__builtin_ia32_pslldqi256: { // Shift value is in bits so divide by 8. diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index a30f590ad169..69781d51d6c5 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -96,7 +96,7 @@ __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) { } __m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) { - // CHECK: @llvm.x86.avx2.psrl.dq({{.*}}, i32 8) + // CHECK: shufflevector <32 x i8> %5, <32 x i8> zeroinitializer, <32 x i32> return _mm256_alignr_epi8(a, b, 17); } diff --git a/clang/test/CodeGen/sse-builtins.c b/clang/test/CodeGen/sse-builtins.c index 267deead700a..2893e611b193 100644 --- a/clang/test/CodeGen/sse-builtins.c +++ b/clang/test/CodeGen/sse-builtins.c @@ -574,6 +574,6 @@ __m128i test_mm_alignr_epi8(__m128i a, __m128i b) { } __m128i test2_mm_alignr_epi8(__m128i a, __m128i b) { - // CHECK: @llvm.x86.sse2.psrl.dq({{.*}}, i32 8) + // CHECK: shufflevector <16 x i8> %5, <16 x i8> zeroinitializer, <16 x i32> return _mm_alignr_epi8(a, b, 17); }