From be79a7ac7abd3f7d346876b0c413579264408f00 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Mon, 1 Apr 2013 15:53:30 +0000
Subject: [PATCH] Add support for vector data types in the LLVM interpreter.

Patch by:
Veselov, Yuri <Yuri.Veselov@intel.com>

llvm-svn: 178469
---
 .../llvm/ExecutionEngine/GenericValue.h       |  11 +-
 llvm/lib/ExecutionEngine/ExecutionEngine.cpp  | 151 +++++++++++++++++-
 .../ExecutionEngine/Interpreter/Execution.cpp |  33 ++++
 .../ExecutionEngine/Interpreter/Interpreter.h |   1 +
 .../test-interp-vec-loadstore.ll              |  84 ++++++++++
 5 files changed, 274 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll
diff --git a/llvm/include/llvm/ExecutionEngine/GenericValue.h b/llvm/include/llvm/ExecutionEngine/GenericValue.h
index 21b99421b32d..0e92f79eba8f 100644
--- a/llvm/include/llvm/ExecutionEngine/GenericValue.h
+++ b/llvm/include/llvm/ExecutionEngine/GenericValue.h
@@ -35,14 +35,19 @@ struct GenericValue {
     struct IntPair  UIntPairVal;
     unsigned char   Untyped[8];
   };
-  APInt IntVal;   // also used for long doubles
+  APInt IntVal;   // also used for long doubles.
+  // For aggregate data types.
+  std::vector<GenericValue> AggregateVal;
 
-  GenericValue() : DoubleVal(0.0), IntVal(1,0) {}
+  // to make code faster, set GenericValue to zero could be omitted, but it is
+  // potentially can cause problems, since GenericValue to store garbage
+  // instead of zero.
+  GenericValue() : IntVal(1,0) {UIntPairVal.first = 0; UIntPairVal.second = 0;}
   explicit GenericValue(void *V) : PointerVal(V), IntVal(1,0) { }
 };
 
 inline GenericValue PTOGV(void *P) { return GenericValue(P); }
 inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; }
 
-} // End llvm namespace
+} // End llvm namespace.
 #endif
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 3d59d251a085..906a3a3fda7f 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -535,6 +535,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
   if (isa<UndefValue>(C)) {
     GenericValue Result;
     switch (C->getType()->getTypeID()) {
+    default:
+      break;
     case Type::IntegerTyID:
     case Type::X86_FP80TyID:
     case Type::FP128TyID:
@@ -543,7 +545,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       // with the correct bit width.
       Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
       break;
-    default:
+    case Type::VectorTyID:
+      // if the whole vector is 'undef' just reserve memory for the value.
+      const VectorType* VTy = dyn_cast<VectorType>(C->getType());
+      const Type *ElemTy = VTy->getElementType();
+      unsigned int elemNum = VTy->getNumElements();
+      Result.AggregateVal.resize(elemNum);
+      if (ElemTy->isIntegerTy())
+        for (unsigned int i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = 
+            APInt(ElemTy->getPrimitiveSizeInBits(), 0);
       break;
     }
     return Result;
@@ -825,6 +836,101 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     else
       llvm_unreachable("Unknown constant pointer type!");
     break;
+  case Type::VectorTyID: {
+    unsigned elemNum;
+    Type* ElemTy;
+    const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+    const ConstantVector *CV = dyn_cast<ConstantVector>(C);
+    const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(C);
+
+    if (CDV) {
+        elemNum = CDV->getNumElements();
+        ElemTy = CDV->getElementType();
+    } else if (CV || CAZ) {
+        VectorType* VTy = dyn_cast<VectorType>(C->getType());
+        elemNum = VTy->getNumElements();
+        ElemTy = VTy->getElementType();
+    } else {
+        llvm_unreachable("Unknown constant vector type!");
+    }
+
+    Result.AggregateVal.resize(elemNum);
+    // Check if vector holds floats.
+    if(ElemTy->isFloatTy()) {
+      if (CAZ) {
+        GenericValue floatZero;
+        floatZero.FloatVal = 0.f;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  floatZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].FloatVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToFloat();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i);
+
+      break;
+    }
+    // Check if vector holds doubles.
+    if (ElemTy->isDoubleTy()) {
+      if (CAZ) {
+        GenericValue doubleZero;
+        doubleZero.DoubleVal = 0.0;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  doubleZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].DoubleVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToDouble();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i);
+
+      break;
+    }
+    // Check if vector holds integers.
+    if (ElemTy->isIntegerTy()) {
+      if (CAZ) {
+        GenericValue intZero;     
+        intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull);
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  intZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].IntVal = cast<ConstantInt>(
+                                            CV->getOperand(i))->getValue();
+          else {
+            Result.AggregateVal[i].IntVal =
+              APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0);
+          }
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = APInt(
+            CDV->getElementType()->getPrimitiveSizeInBits(),
+            CDV->getElementAsInteger(i));
+
+      break;
+    }
+    llvm_unreachable("Unknown constant pointer type!");
+  }
+  break;
+
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
@@ -866,6 +972,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
   const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
 
   switch (Ty->getTypeID()) {
+  default:
+    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+    break;
   case Type::IntegerTyID:
     StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
     break;
@@ -885,8 +994,19 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
 
     *((PointerTy*)Ptr) = Val.PointerVal;
     break;
-  default:
-    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+  case Type::VectorTyID:
+    for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) {
+      if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+        *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal;
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal;
+      if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) {
+        unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8;
+        StoreIntToMemory(Val.AggregateVal[i].IntVal, 
+          (uint8_t*)Ptr + numOfBytes*i, numOfBytes);
+      }
+    }
+    break;
   }
 
   if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
@@ -951,6 +1071,31 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     Result.IntVal = APInt(80, y);
     break;
   }
+  case Type::VectorTyID: {
+    const VectorType *VT = cast<VectorType>(Ty);
+    const Type *ElemT = VT->getElementType();
+    const unsigned numElems = VT->getNumElements();
+    if (ElemT->isFloatTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].FloatVal = *((float*)Ptr+i);
+    }
+    if (ElemT->isDoubleTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i);
+    }
+    if (ElemT->isIntegerTy()) {
+      GenericValue intZero;
+      const unsigned elemBitWidth = cast<IntegerType>(ElemT)->getBitWidth();
+      intZero.IntVal = APInt(elemBitWidth, 0);
+      Result.AggregateVal.resize(numElems, intZero);
+      for (unsigned i = 0; i < numElems; ++i)
+        LoadIntFromMemory(Result.AggregateVal[i].IntVal,
+          (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8);
+    }
+  break;
+  }
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
diff --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index ec4f7f681364..526c04e082d2 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1187,6 +1187,39 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
   ++VAList.UIntPairVal.second;
 }
 
+void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+
+  Type *Ty = I.getType();
+  const unsigned indx = unsigned(Src2.IntVal.getZExtValue());
+
+  if(Src1.AggregateVal.size() > indx) {
+    switch (Ty->getTypeID()) {
+    default:
+      dbgs() << "Unhandled destination type for extractelement instruction: "
+      << *Ty << "\n";
+      llvm_unreachable(0);
+      break;
+    case Type::IntegerTyID:
+      Dest.IntVal = Src1.AggregateVal[indx].IntVal;
+      break;
+    case Type::FloatTyID:
+      Dest.FloatVal = Src1.AggregateVal[indx].FloatVal;
+      break;
+    case Type::DoubleTyID:
+      Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal;
+      break;
+    }
+  } else {
+    dbgs() << "Invalid index in extractelement instruction\n";
+  }
+
+  SetValue(&I, Dest, SF);
+}
+
 GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
                                                 ExecutionContext &SF) {
   switch (CE->getOpcode()) {
diff --git a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index e95db2fc4eba..2952d7eabe2b 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -178,6 +178,7 @@ public:
   void visitAShr(BinaryOperator &I);
 
   void visitVAArgInst(VAArgInst &I);
+  void visitExtractElementInst(ExtractElementInst &I);
   void visitInstruction(Instruction &I) {
     errs() << I << "\n";
     llvm_unreachable("Instruction not interpretable yet!");
diff --git a/llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll b/llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll
new file mode 100644
index 000000000000..e9f5b445a864
--- /dev/null
+++ b/llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll
@@ -0,0 +1,84 @@
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; CHECK: 1
+; CHECK: 2
+; CHECK: 3
+; CHECK: 4
+; CHECK: 5.{{[0]+}}e+{{[0]+}}
+; CHECK: 6.{{[0]+}}e+{{[0]+}}
+; CHECK: 7.{{[0]+}}e+{{[0]+}}
+; CHECK: 8.{{[0]+}}e+{{[0]+}}
+; CHECK: 9.{{[0]+}}e+{{[0]+}}
+; CHECK: 1.{{[0]+}}e+{{[0]+}}1
+; CHECK: 1.1{{[0]+}}e+{{[0]+}}1
+; CHECK: 1.2{{[0]+}}e+{{[0]+}}1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+@format_i32 = internal global [4 x i8] c"%d\0A\00"
+@format_float = internal global [4 x i8] c"%e\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x double>, align 16
+  %c = alloca <4 x float>, align 16
+  
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %a, align 16
+
+  %val0 = load <4 x i32> *%a, align 16
+
+  %res_i32_0 = extractelement <4 x i32> %val0, i32 0
+  %res_i32_1 = extractelement <4 x i32> %val0, i32 1
+  %res_i32_2 = extractelement <4 x i32> %val0, i32 2
+  %res_i32_3 = extractelement <4 x i32> %val0, i32 3
+  
+  %ptr0 = getelementptr [4 x i8]* @format_i32, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_0)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_1)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_2)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_3)
+
+  store <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, <4 x double>* %b, align 16
+
+  %val1 = load <4 x double> *%b, align 16
+
+  %res_double_0 = extractelement <4 x double> %val1, i32 0
+  %res_double_1 = extractelement <4 x double> %val1, i32 1
+  %res_double_2 = extractelement <4 x double> %val1, i32 2
+  %res_double_3 = extractelement <4 x double> %val1, i32 3
+  
+  %ptr1 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_0)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_1)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_2)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_3)
+
+
+  store <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, <4 x float>* %c, align 16
+  
+  %val2 = load <4 x float> *%c, align 16
+  
+  %ptr2 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
+
+  ; by some reason printf doesn't print float correctly, so
+  ; floats are casted to doubles and are printed as doubles
+  
+  %res_serv_0 = extractelement <4 x float> %val2, i32 0
+  %res_float_0 = fpext float %res_serv_0 to double
+  %res_serv_1 = extractelement <4 x float> %val2, i32 1
+  %res_float_1 = fpext float %res_serv_1 to double
+  %res_serv_2 = extractelement <4 x float> %val2, i32 2
+  %res_float_2 = fpext float %res_serv_2 to double
+  %res_serv_3 = extractelement <4 x float> %val2, i32 3
+  %res_float_3 = fpext float %res_serv_3 to double
+
+ 
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_0)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_1)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_2)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_3)
+ 
+  
+  ret i32 0
+}