LoadStoreVectorizer: if one element of a vector is integer, default to

integer. Fixes issues on some architectures where we use arithmetic ops to build vectors, which can cause bad things to happen for loads/stores of mixed types. Patch by Fiona Glaser llvm-svn: 274307
2016-07-01 00:37:01 +00:00 · 2016-07-01 00:37:01 +00:00 · d7e8898bdd
parent 8a4ab5e19f
commit d7e8898bdd
2 changed files with 19 additions and 3 deletions
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@ -579,7 +579,15 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {

 bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
  StoreInst *S0 = cast<StoreInst>(Chain[0]);
-  Type *StoreTy = S0->getValueOperand()->getType();
+
+  // If the vector has an int element, default to int for the whole load.
+  Type *StoreTy;
+  for (const auto &V : Chain) {
+    StoreTy = cast<StoreInst>(V)->getValueOperand()->getType();
+    if (StoreTy->isIntOrIntVectorTy())
+      break;
+  }
+
  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
  unsigned VF = VecRegSize / Sz;
  unsigned ChainSize = Chain.size();
@ -700,7 +708,15 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {

 bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
  LoadInst *L0 = cast<LoadInst>(Chain[0]);
-  Type *LoadTy = L0->getType();
+
+  // If the vector has an int element, default to int for the whole load.
+  Type *LoadTy;
+  for (const auto &V : Chain) {
+    LoadTy = cast<LoadInst>(V)->getType();
+    if (LoadTy->isIntOrIntVectorTy())
+      break;
+  }
+
  unsigned Sz = DL.getTypeSizeInBits(LoadTy);
  unsigned VF = VecRegSize / Sz;
  unsigned ChainSize = Chain.size();
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
@ -140,7 +140,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
 }

 ; CHECK-LABEL: @merge_global_store_4_constants_mixed_i32_f32
-; CHECK: store <4 x float> <float 8.000000e+00, float 0x36D6000000000000, float 2.000000e+00, float 0x36E1000000000000>, <4 x float> addrspace(1)* %{{[0-9]+}}
+; CHECK: store <4 x i32> <i32 1090519040, i32 11, i32 1073741824, i32 17>, <4 x i32> addrspace(1)* %{{[0-9]+$}}
 define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2