Add a stub for the x86 cost model impl. Implement a basic cost rule for inserting/extracting from XMM registers.

llvm-svn: 167333
This commit is contained in:
Nadav Rotem 2012-11-02 23:27:16 +00:00
parent 263f6a0ffa
commit 23848f8f1d
3 changed files with 51 additions and 2 deletions

View File

@ -19,6 +19,7 @@
#include "X86RegisterInfo.h"
#include "X86MachineFunctionInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
@ -946,6 +947,21 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
public:
explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
VectorTargetTransformImpl(TL) {}
virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const {
// Floating point scalars are already located in index #0.
if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
return 0;
return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
}
};
}
#endif // X86ISELLOWERING_H

View File

@ -82,7 +82,7 @@ class X86_32TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
ScalarTargetTransformImpl STTI;
VectorTargetTransformImpl VTTI;
X86VectorTargetTransformInfo VTTI;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@ -119,7 +119,7 @@ class X86_64TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
ScalarTargetTransformImpl STTI;
VectorTargetTransformImpl VTTI;
X86VectorTargetTransformInfo VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,

View File

@ -0,0 +1,33 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
;CHECK: cost of 0 {{.*}} extract
%A = extractelement <4 x float> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%B = extractelement <4 x i32> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%C = extractelement <4 x float> undef, i32 1
;CHECK: cost of 0 {{.*}} extract
%D = extractelement <8 x float> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%E = extractelement <8 x float> undef, i32 1
;CHECK: cost of 1 {{.*}} extract
%F = extractelement <8 x float> undef, i32 %arg
;CHECK: cost of 0 {{.*}} insert
%G = insertelement <4 x float> undef, float %fl, i32 0
;CHECK: cost of 1 {{.*}} insert
%H = insertelement <4 x float> undef, float %fl, i32 1
;CHECK: cost of 1 {{.*}} insert
%I = insertelement <4 x i32> undef, i32 %arg, i32 0
;CHECK: cost of 0 {{.*}} insert
%J = insertelement <4 x double> undef, double undef, i32 0
ret i32 0
}