diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index f5a26d012de9..7b3697be0ae0 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -58,8 +58,9 @@ static bool foldExtractCmp(Instruction &I, const TargetTransformInfo &TTI) { // ((2 * extract) + scalar cmp) < (vector cmp + extract) ? int ExtractCost = TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, C->getZExtValue()); - int ScalarCmpCost = TTI.getOperationCost(CmpOpcode, ScalarTy); - int VecCmpCost = TTI.getOperationCost(CmpOpcode, VecTy); + int ScalarCmpCost = TTI.getCmpSelInstrCost(CmpOpcode, ScalarTy, I.getType()); + int VecCmpCost = TTI.getCmpSelInstrCost(CmpOpcode, VecTy, + CmpInst::makeCmpResultType(VecTy)); int ScalarCost = 2 * ExtractCost + ScalarCmpCost; int VecCost = VecCmpCost + ExtractCost + diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll index d5d11df0ece0..8d04af3c8105 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- | FileCheck %s +; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX define i1 @cmp_v4i32(<4 x float> %arg, <4 x float> %arg1) { ; CHECK-LABEL: @cmp_v4i32( @@ -57,18 +58,32 @@ bb18: } define i32 @cmp_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) { -; CHECK-LABEL: @cmp_v2f64( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP1]], label [[T:%.*]], label [[F:%.*]] -; CHECK: t: -; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 42, i32 99 -; CHECK-NEXT: ret i32 [[E]] -; CHECK: f: -; CHECK-NEXT: ret i32 0 +; SSE-LABEL: @cmp_v2f64( +; SSE-NEXT: entry: +; SSE-NEXT: [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 +; SSE-NEXT: [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1 +; SSE-NEXT: [[CMP1:%.*]] = fcmp oeq double [[X1]], [[Y1]] +; SSE-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]] +; SSE: t: +; SSE-NEXT: [[Z1:%.*]] = extractelement <2 x double> [[Z:%.*]], i32 1 +; SSE-NEXT: [[CMP2:%.*]] = fcmp ogt double [[Y1]], [[Z1]] +; SSE-NEXT: [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99 +; SSE-NEXT: ret i32 [[E]] +; SSE: f: +; SSE-NEXT: ret i32 0 +; +; AVX-LABEL: @cmp_v2f64( +; AVX-NEXT: entry: +; AVX-NEXT: [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]] +; AVX-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; AVX-NEXT: br i1 [[TMP1]], label [[T:%.*]], label [[F:%.*]] +; AVX: t: +; AVX-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 +; AVX-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 42, i32 99 +; AVX-NEXT: ret i32 [[E]] +; AVX: f: +; AVX-NEXT: ret i32 0 ; entry: %x1 = extractelement <2 x double> %x, i32 1