diff --git a/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 8b418a08cbeb..1c5e739ef841 100644
--- a/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -322,9 +322,6 @@ namespace {
     /// target-specific node if it hasn't already been changed.
     SDNode *Select(SDValue Op);
 
-    //! Emit the instruction sequence for i128 sext
-    SDNode *SelectSEXTi128(SDValue &Op, EVT OpVT);
-
     //! Emit the instruction sequence for i64 shl
     SDNode *SelectSHLi64(SDValue &Op, EVT OpVT);
 
@@ -836,10 +833,6 @@ SPUDAGToDAGISel::Select(SDValue Op) {
         }
       }
     }
-  } else if (Opc == ISD::SIGN_EXTEND) {
-    if (OpVT == MVT::i128) {
-      return SelectSEXTi128(Op, OpVT);
-    }
   } else if (Opc == ISD::SHL) {
     if (OpVT == MVT::i64) {
       return SelectSHLi64(Op, OpVT);
@@ -963,58 +956,6 @@ SPUDAGToDAGISel::Select(SDValue Op) {
     return SelectCode(Op);
 }
 
-/*!
- * Emit the instruction sequence for i64 -> i128 sign extend. The basic
- * algorithm is to duplicate the sign bit using rotmai to generate at
- * least one byte full of sign bits. Then propagate the "sign-byte" into
- * theleftmost words and the i64 into the rightmost words using shufb.
- *
- * @param Op The sext operand
- * @param OpVT The type to extend to
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSEXTi128(SDValue &Op, EVT OpVT)
-{
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Type to extend from
-  SDValue Op0 = Op.getOperand(0);
-  EVT Op0VT = Op0.getValueType();
-
-  assert((OpVT == MVT::i128 && Op0VT == MVT::i64) &&
-         "LowerSIGN_EXTEND: input and/or output operand have wrong size");
-
-  // Create shuffle mask
-  unsigned mask1 = 0x10101010; // byte  0 -  3 and 4 - 7
-  unsigned mask2 = 0x01020304; // byte  8 - 11
-  unsigned mask3 = 0x05060708; // byte 12 - 15
-  SDValue shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                             CurDAG->getConstant(mask1, MVT::i32),
-                             CurDAG->getConstant(mask1, MVT::i32),
-                             CurDAG->getConstant(mask2, MVT::i32),
-                             CurDAG->getConstant(mask3, MVT::i32));
-  SDNode *shufMaskLoad = emitBuildVector(shufMask);
-
-  // Word wise arithmetic right shift to generate at least one byte
-  // that contains sign bits.
-  SDNode *PromoteScalar = SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
-                                                     MVT::v2i64, Op0, Op0));
-  SDNode *sraVal = SelectCode(CurDAG->getNode(ISD::SRA, dl, MVT::v2i64,
-                                         SDValue(PromoteScalar, 0),
-                                         CurDAG->getConstant(31, MVT::i32)));
-
-  // Shuffle bytes - Copy the sign bits into the upper 64 bits
-  // and the input value into the lower 64 bits.
-  SDNode *extShuffle = SelectCode(CurDAG->getNode(SPUISD::SHUFB, dl,
-                                                  MVT::v2i64, Op0,
-                                                  SDValue(sraVal, 0),
-                                                  SDValue(shufMaskLoad, 0)));
-
-  return SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, MVT::i128,
-                                    SDValue(extShuffle, 0)));
-}
-
 /*!
  * Emit the instruction sequence for i64 left shifts. The basic algorithm
  * is to fill the bottom two word slots with zeros so that zeros are shifted
diff --git a/llvm/test/CodeGen/CellSPU/loads.ll b/llvm/test/CodeGen/CellSPU/loads.ll
index 3b9746c8080a..4addbab87a92 100644
--- a/llvm/test/CodeGen/CellSPU/loads.ll
+++ b/llvm/test/CodeGen/CellSPU/loads.ll
@@ -1,6 +1,4 @@
-; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep {lqd.*0(\$3)}   %t1.s | count 1
-; RUN: grep {lqd.*16(\$3)}  %t1.s | count 1
+; RUN: llvm-as -o - %s | llc -march=cellspu | FileCheck %s
 
 ; ModuleID = 'loads.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -10,11 +8,13 @@ define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
 entry:
 	%tmp1 = load <4 x float>* %a
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 0($3)
 }
 
 define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
 entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1		; <<4 x float>*> [#uses=1]
-	%tmp1 = load <4 x float>* %arrayidx		; <<4 x float>> [#uses=1]
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	%tmp1 = load <4 x float>* %arrayidx
 	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 16($3)
 }