Remove llvm-upgrade and update tests.

llvm-svn: 47325
This commit is contained in:
Tanya Lattner 2008-02-19 08:07:33 +00:00
parent a00c808d40
commit a99d8b5a9a
51 changed files with 1303 additions and 1307 deletions

View File

@ -1,54 +1,52 @@
; All of these routines should be perform optimal load of constants.
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: grep lis | count 5
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: grep ori | count 3
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: grep {li } | count 4
implementation ; Functions:
int %f1() {
define i32 @f1() {
entry:
ret int 1
ret i32 1
}
int %f2() {
define i32 @f2() {
entry:
ret int -1
ret i32 -1
}
int %f3() {
define i32 @f3() {
entry:
ret int 0
ret i32 0
}
int %f4() {
define i32 @f4() {
entry:
ret int 32767
ret i32 32767
}
int %f5() {
define i32 @f5() {
entry:
ret int 65535
ret i32 65535
}
int %f6() {
define i32 @f6() {
entry:
ret int 65536
ret i32 65536
}
int %f7() {
define i32 @f7() {
entry:
ret int 131071
ret i32 131071
}
int %f8() {
define i32 @f8() {
entry:
ret int 2147483647
ret i32 2147483647
}
int %f9() {
define i32 @f9() {
entry:
ret int -2147483648
ret i32 -2147483648
}

View File

@ -1,12 +1,11 @@
; Make sure this testcase does not use ctpop
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep -i cntlzw
; RUN: llvm-as < %s | llc -march=ppc32 | grep -i cntlzw
declare uint %llvm.cttz.i32(uint)
declare i32 @llvm.cttz.i32(i32)
implementation ; Functions:
uint %bar(uint %x) {
define i32 @bar(i32 %x) {
entry:
%tmp.1 = call uint %llvm.cttz.i32( uint %x )
ret uint %tmp.1
%tmp.1 = call i32 @llvm.cttz.i32( i32 %x ) ; <i32> [#uses=1]
ret i32 %tmp.1
}

View File

@ -1,8 +1,6 @@
; RUN: llvm-upgrade < %s | llvm-as | llc | grep {foo bar":}
; RUN: llvm-as < %s | llc | grep {foo bar":}
target endian = big
target pointersize = 32
target datalayout = "E-p:32:32"
target triple = "powerpc-apple-darwin8.2.0"
"foo bar" = global int 4
@"foo bar" = global i32 4 ; <i32*> [#uses=0]

View File

@ -1,29 +1,30 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep srawi
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep blr
; RUN: llvm-as < %s | llc -march=ppc32 | not grep srawi
; RUN: llvm-as < %s | llc -march=ppc32 | grep blr
int %test1(int %X) {
%Y = and int %X, 15
%Z = div int %Y, 4
ret int %Z
define i32 @test1(i32 %X) {
%Y = and i32 %X, 15 ; <i32> [#uses=1]
%Z = sdiv i32 %Y, 4 ; <i32> [#uses=1]
ret i32 %Z
}
int %test2(int %W) {
%X = and int %W, 15
%Y = sub int 16, %X
%Z = div int %Y, 4
ret int %Z
define i32 @test2(i32 %W) {
%X = and i32 %W, 15 ; <i32> [#uses=1]
%Y = sub i32 16, %X ; <i32> [#uses=1]
%Z = sdiv i32 %Y, 4 ; <i32> [#uses=1]
ret i32 %Z
}
int %test3(int %W) {
%X = and int %W, 15
%Y = sub int 15, %X
%Z = div int %Y, 4
ret int %Z
define i32 @test3(i32 %W) {
%X = and i32 %W, 15 ; <i32> [#uses=1]
%Y = sub i32 15, %X ; <i32> [#uses=1]
%Z = sdiv i32 %Y, 4 ; <i32> [#uses=1]
ret i32 %Z
}
int %test4(int %W) {
%X = and int %W, 2
%Y = sub int 5, %X
%Z = div int %Y, 2
ret int %Z
define i32 @test4(i32 %W) {
%X = and i32 %W, 2 ; <i32> [#uses=1]
%Y = sub i32 5, %X ; <i32> [#uses=1]
%Z = sdiv i32 %Y, 2 ; <i32> [#uses=1]
ret i32 %Z
}

View File

@ -1,94 +1,93 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: grep eqv | count 3
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
; RUN: grep andc | count 3
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: grep orc | count 2
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
; RUN: grep nor | count 3
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: grep nand | count 1
int %EQV1(int %X, int %Y) {
%A = xor int %X, %Y
%B = xor int %A, -1
ret int %B
define i32 @EQV1(i32 %X, i32 %Y) {
%A = xor i32 %X, %Y ; <i32> [#uses=1]
%B = xor i32 %A, -1 ; <i32> [#uses=1]
ret i32 %B
}
int %EQV2(int %X, int %Y) {
%A = xor int %X, -1
%B = xor int %A, %Y
ret int %B
define i32 @EQV2(i32 %X, i32 %Y) {
%A = xor i32 %X, -1 ; <i32> [#uses=1]
%B = xor i32 %A, %Y ; <i32> [#uses=1]
ret i32 %B
}
int %EQV3(int %X, int %Y) {
%A = xor int %X, -1
%B = xor int %Y, %A
ret int %B
define i32 @EQV3(i32 %X, i32 %Y) {
%A = xor i32 %X, -1 ; <i32> [#uses=1]
%B = xor i32 %Y, %A ; <i32> [#uses=1]
ret i32 %B
}
int %ANDC1(int %X, int %Y) {
%A = xor int %Y, -1
%B = and int %X, %A
ret int %B
define i32 @ANDC1(i32 %X, i32 %Y) {
%A = xor i32 %Y, -1 ; <i32> [#uses=1]
%B = and i32 %X, %A ; <i32> [#uses=1]
ret i32 %B
}
int %ANDC2(int %X, int %Y) {
%A = xor int %X, -1
%B = and int %A, %Y
ret int %B
define i32 @ANDC2(i32 %X, i32 %Y) {
%A = xor i32 %X, -1 ; <i32> [#uses=1]
%B = and i32 %A, %Y ; <i32> [#uses=1]
ret i32 %B
}
int %ORC1(int %X, int %Y) {
%A = xor int %Y, -1
%B = or int %X, %A
ret int %B
define i32 @ORC1(i32 %X, i32 %Y) {
%A = xor i32 %Y, -1 ; <i32> [#uses=1]
%B = or i32 %X, %A ; <i32> [#uses=1]
ret i32 %B
}
int %ORC2(int %X, int %Y) {
%A = xor int %X, -1
%B = or int %A, %Y
ret int %B
define i32 @ORC2(i32 %X, i32 %Y) {
%A = xor i32 %X, -1 ; <i32> [#uses=1]
%B = or i32 %A, %Y ; <i32> [#uses=1]
ret i32 %B
}
int %NOR1(int %X) {
%Y = xor int %X, -1
ret int %Y
define i32 @NOR1(i32 %X) {
%Y = xor i32 %X, -1 ; <i32> [#uses=1]
ret i32 %Y
}
int %NOR2(int %X, int %Y) {
%Z = or int %X, %Y
%R = xor int %Z, -1
ret int %R
define i32 @NOR2(i32 %X, i32 %Y) {
%Z = or i32 %X, %Y ; <i32> [#uses=1]
%R = xor i32 %Z, -1 ; <i32> [#uses=1]
ret i32 %R
}
int %NAND1(int %X, int %Y) {
%Z = and int %X, %Y
%W = xor int %Z, -1
ret int %W
define i32 @NAND1(i32 %X, i32 %Y) {
%Z = and i32 %X, %Y ; <i32> [#uses=1]
%W = xor i32 %Z, -1 ; <i32> [#uses=1]
ret i32 %W
}
void %VNOR(<4 x float>* %P, <4 x float>* %Q) {
%tmp = load <4 x float>* %P
%tmp = cast <4 x float> %tmp to <4 x int>
%tmp2 = load <4 x float>* %Q
%tmp2 = cast <4 x float> %tmp2 to <4 x int>
%tmp3 = or <4 x int> %tmp, %tmp2
%tmp4 = xor <4 x int> %tmp3, < int -1, int -1, int -1, int -1 >
%tmp4 = cast <4 x int> %tmp4 to <4 x float>
store <4 x float> %tmp4, <4 x float>* %P
ret void
define void @VNOR(<4 x float>* %P, <4 x float>* %Q) {
%tmp = load <4 x float>* %P ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = bitcast <4 x float> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp2 = load <4 x float>* %Q ; <<4 x float>> [#uses=1]
%tmp2.upgrd.2 = bitcast <4 x float> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp3 = or <4 x i32> %tmp.upgrd.1, %tmp2.upgrd.2 ; <<4 x i32>> [#uses=1]
%tmp4 = xor <4 x i32> %tmp3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%tmp4.upgrd.3 = bitcast <4 x i32> %tmp4 to <4 x float> ; <<4 x float>> [#uses=1]
store <4 x float> %tmp4.upgrd.3, <4 x float>* %P
ret void
}
void %VANDC(<4 x float>* %P, <4 x float>* %Q) {
%tmp = load <4 x float>* %P
%tmp = cast <4 x float> %tmp to <4 x int>
%tmp2 = load <4 x float>* %Q
%tmp2 = cast <4 x float> %tmp2 to <4 x int>
%tmp4 = xor <4 x int> %tmp2, < int -1, int -1, int -1, int -1 >
%tmp3 = and <4 x int> %tmp, %tmp4
%tmp4 = cast <4 x int> %tmp3 to <4 x float>
store <4 x float> %tmp4, <4 x float>* %P
ret void
define void @VANDC(<4 x float>* %P, <4 x float>* %Q) {
%tmp = load <4 x float>* %P ; <<4 x float>> [#uses=1]
%tmp.upgrd.4 = bitcast <4 x float> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp2 = load <4 x float>* %Q ; <<4 x float>> [#uses=1]
%tmp2.upgrd.5 = bitcast <4 x float> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp4 = xor <4 x i32> %tmp2.upgrd.5, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%tmp3 = and <4 x i32> %tmp.upgrd.4, %tmp4 ; <<4 x i32>> [#uses=1]
%tmp4.upgrd.6 = bitcast <4 x i32> %tmp3 to <4 x float> ; <<4 x float>> [#uses=1]
store <4 x float> %tmp4.upgrd.6, <4 x float>* %P
ret void
}

View File

@ -1,7 +1,8 @@
; This should turn into a single extsh
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep extsh | count 1
int %test(int %X) {
%tmp.81 = shl int %X, ubyte 16 ; <int> [#uses=1]
%tmp.82 = shr int %tmp.81, ubyte 16 ; <int> [#uses=1]
ret int %tmp.82
; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh | count 1
define i32 @test(i32 %X) {
%tmp.81 = shl i32 %X, 16 ; <i32> [#uses=1]
%tmp.82 = ashr i32 %tmp.81, 16 ; <i32> [#uses=1]
ret i32 %tmp.82
}

View File

@ -1,47 +1,54 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: egrep {fn?madd|fn?msub} | count 8
double %test_FMADD1(double %A, double %B, double %C) {
%D = mul double %A, %B
%E = add double %D, %C
define double @test_FMADD1(double %A, double %B, double %C) {
%D = mul double %A, %B ; <double> [#uses=1]
%E = add double %D, %C ; <double> [#uses=1]
ret double %E
}
double %test_FMADD2(double %A, double %B, double %C) {
%D = mul double %A, %B
%E = add double %D, %C
define double @test_FMADD2(double %A, double %B, double %C) {
%D = mul double %A, %B ; <double> [#uses=1]
%E = add double %D, %C ; <double> [#uses=1]
ret double %E
}
double %test_FMSUB(double %A, double %B, double %C) {
%D = mul double %A, %B
%E = sub double %D, %C
define double @test_FMSUB(double %A, double %B, double %C) {
%D = mul double %A, %B ; <double> [#uses=1]
%E = sub double %D, %C ; <double> [#uses=1]
ret double %E
}
double %test_FNMADD1(double %A, double %B, double %C) {
%D = mul double %A, %B
%E = add double %D, %C
%F = sub double -0.0, %E
define double @test_FNMADD1(double %A, double %B, double %C) {
%D = mul double %A, %B ; <double> [#uses=1]
%E = add double %D, %C ; <double> [#uses=1]
%F = sub double -0.000000e+00, %E ; <double> [#uses=1]
ret double %F
}
double %test_FNMADD2(double %A, double %B, double %C) {
%D = mul double %A, %B
%E = add double %C, %D
%F = sub double -0.0, %E
define double @test_FNMADD2(double %A, double %B, double %C) {
%D = mul double %A, %B ; <double> [#uses=1]
%E = add double %C, %D ; <double> [#uses=1]
%F = sub double -0.000000e+00, %E ; <double> [#uses=1]
ret double %F
}
double %test_FNMSUB1(double %A, double %B, double %C) {
%D = mul double %A, %B
%E = sub double %C, %D
define double @test_FNMSUB1(double %A, double %B, double %C) {
%D = mul double %A, %B ; <double> [#uses=1]
%E = sub double %C, %D ; <double> [#uses=1]
ret double %E
}
double %test_FNMSUB2(double %A, double %B, double %C) {
%D = mul double %A, %B
%E = sub double %D, %C
%F = sub double -0.0, %E
define double @test_FNMSUB2(double %A, double %B, double %C) {
%D = mul double %A, %B ; <double> [#uses=1]
%E = sub double %D, %C ; <double> [#uses=1]
%F = sub double -0.000000e+00, %E ; <double> [#uses=1]
ret double %F
}
float %test_FNMSUBS(float %A, float %B, float %C) {
%D = mul float %A, %B
%E = sub float %D, %C
%F = sub float -0.0, %E
define float @test_FNMSUBS(float %A, float %B, float %C) {
%D = mul float %A, %B ; <float> [#uses=1]
%E = sub float %D, %C ; <float> [#uses=1]
%F = sub float -0.000000e+00, %E ; <float> [#uses=1]
ret float %F
}

View File

@ -1,11 +1,10 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fnabs
; RUN: llvm-as < %s | llc -march=ppc32 | grep fnabs
declare double %fabs(double)
declare double @fabs(double)
implementation
double %test(double %X) {
%Y = call double %fabs(double %X)
%Z = sub double -0.0, %Y
ret double %Z
define double @test(double %X) {
%Y = call double @fabs( double %X ) ; <double> [#uses=1]
%Z = sub double -0.000000e+00, %Y ; <double> [#uses=1]
ret double %Z
}

View File

@ -1,8 +1,9 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep fneg
; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
double %test_FNEG_sel(double %A, double %B, double %C) {
%D = sub double -0.0, %A
%Cond = setgt double %D, -0.0
%E = select bool %Cond, double %B, double %C
ret double %E
define double @test_FNEG_sel(double %A, double %B, double %C) {
%D = sub double -0.000000e+00, %A ; <double> [#uses=1]
%Cond = fcmp ogt double %D, -0.000000e+00 ; <i1> [#uses=1]
%E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
ret double %E
}

View File

@ -1,14 +1,15 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
; RUN: llvm-as < %s | llc -march=ppc32 | \
; RUN: grep -v align | not grep li
;; Test that immediates are folded into these instructions correctly.
int %ADD(int %X) {
%Y = add int %X, 65537
ret int %Y
define i32 @ADD(i32 %X) {
%Y = add i32 %X, 65537 ; <i32> [#uses=1]
ret i32 %Y
}
int %SUB(int %X) {
%Y = sub int %X, 65537
ret int %Y
define i32 @SUB(i32 %X) {
%Y = sub i32 %X, 65537 ; <i32> [#uses=1]
ret i32 %Y
}

View File

@ -1,20 +1,21 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fcmp | count 1
; RUN: llvm-as < %s | llc -march=ppc32 | grep fcmp | count 1
declare bool %llvm.isunordered.f64(double, double)
declare i1 @llvm.isunordered.f64(double, double)
bool %intcoord_cond_next55(double %tmp48.reload) {
define i1 @intcoord_cond_next55(double %tmp48.reload) {
newFuncRoot:
br label %cond_next55
br label %cond_next55
bb72.exitStub: ; preds = %cond_next55
ret bool true
bb72.exitStub: ; preds = %cond_next55
ret i1 true
cond_next62.exitStub: ; preds = %cond_next55
ret bool false
cond_next62.exitStub: ; preds = %cond_next55
ret i1 false
cond_next55: ; preds = %newFuncRoot
%tmp57 = setge double %tmp48.reload, 1.000000e+00 ; <bool> [#uses=1]
%tmp58 = tail call bool %llvm.isunordered.f64( double %tmp48.reload, double 1.000000e+00 ) ; <bool> [#uses=1]
%tmp59 = or bool %tmp57, %tmp58 ; <bool> [#uses=1]
br bool %tmp59, label %bb72.exitStub, label %cond_next62.exitStub
cond_next55: ; preds = %newFuncRoot
%tmp57 = fcmp oge double %tmp48.reload, 1.000000e+00 ; <i1> [#uses=1]
%tmp58 = fcmp uno double %tmp48.reload, 1.000000e+00 ; <i1> [#uses=1]
%tmp59 = or i1 %tmp57, %tmp58 ; <i1> [#uses=1]
br i1 %tmp59, label %bb72.exitStub, label %cond_next62.exitStub
}

View File

@ -1,26 +1,27 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep r1
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep r1
double %test1(double %X) {
%Y = cast double %X to long
%Z = cast long %Y to double
define double @test1(double %X) {
%Y = fptosi double %X to i64 ; <i64> [#uses=1]
%Z = sitofp i64 %Y to double ; <double> [#uses=1]
ret double %Z
}
float %test2(double %X) {
%Y = cast double %X to long
%Z = cast long %Y to float
define float @test2(double %X) {
%Y = fptosi double %X to i64 ; <i64> [#uses=1]
%Z = sitofp i64 %Y to float ; <float> [#uses=1]
ret float %Z
}
double %test3(float %X) {
%Y = cast float %X to long
%Z = cast long %Y to double
define double @test3(float %X) {
%Y = fptosi float %X to i64 ; <i64> [#uses=1]
%Z = sitofp i64 %Y to double ; <double> [#uses=1]
ret double %Z
}
float %test4(float %X) {
%Y = cast float %X to long
%Z = cast long %Y to float
define float @test4(float %X) {
%Y = fptosi float %X to i64 ; <i64> [#uses=1]
%Z = sitofp i64 %Y to float ; <float> [#uses=1]
ret float %Z
}

View File

@ -1,9 +1,8 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fctiwz | count 1
; RUN: llvm-as < %s | llc -march=ppc32 | grep fctiwz | count 1
implementation
ushort %foo(float %a) {
define i16 @foo(float %a) {
entry:
%tmp.1 = cast float %a to ushort
ret ushort %tmp.1
%tmp.1 = fptoui float %a to i16 ; <i16> [#uses=1]
ret i16 %tmp.1
}

View File

@ -1,6 +1,7 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep fmr
; RUN: llvm-as < %s | llc -march=ppc32 | not grep fmr
double %test(float %F) {
%F = cast float %F to double
ret double %F
define double @test(float %F) {
%F.upgrd.1 = fpext float %F to double ; <double> [#uses=1]
ret double %F.upgrd.1
}

View File

@ -1,21 +1,23 @@
; fsqrt should be generated when the fsqrt feature is enabled, but not
; otherwise.
; RUN: llvm-upgrade < %s | llvm-as | \
; RUN: llvm-as < %s | \
; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
; RUN: grep {fsqrt f1, f1}
; RUN: llvm-upgrade < %s | llvm-as | \
; RUN: llvm-as < %s | \
; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
; RUN: grep {fsqrt f1, f1}
; RUN: llvm-upgrade < %s | llvm-as | \
; RUN: llvm-as < %s | \
; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
; RUN: not grep {fsqrt f1, f1}
; RUN: llvm-upgrade < %s | llvm-as | \
; RUN: llvm-as < %s | \
; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
; RUN: not grep {fsqrt f1, f1}
declare double %llvm.sqrt.f64(double)
double %X(double %Y) {
%Z = call double %llvm.sqrt.f64(double %Y)
ret double %Z
declare double @llvm.sqrt.f64(double)
define double @X(double %Y) {
%Z = call double @llvm.sqrt.f64( double %Y ) ; <double> [#uses=1]
ret double %Z
}

View File

@ -1,25 +1,26 @@
; fcfid and fctid should be generated when the 64bit feature is enabled, but not
; otherwise.
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=+64bit | \
; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
; RUN: grep fcfid
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=+64bit | \
; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+64bit | \
; RUN: grep fctidz
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
; RUN: grep fcfid
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | \
; RUN: grep fctidz
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \
; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
; RUN: not grep fcfid
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \
; RUN: llvm-as < %s | llc -march=ppc32 -mattr=-64bit | \
; RUN: not grep fctidz
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g4 | \
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
; RUN: not grep fcfid
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g4 | \
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g4 | \
; RUN: not grep fctidz
double %X(double %Y) {
%A = cast double %Y to long
%B = cast long %A to double
ret double %B
define double @X(double %Y) {
%A = fptosi double %Y to i64 ; <i64> [#uses=1]
%B = sitofp i64 %A to double ; <double> [#uses=1]
ret double %B
}

View File

@ -1,13 +1,14 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mr
; RUN: llvm-as < %s | llc -march=ppc32 | not grep mr
int %test(int %Y, int %X) {
define i32 @test(i32 %Y, i32 %X) {
entry:
%tmp = tail call int asm "foo $0", "=r"( ) ; <int> [#uses=1]
ret int %tmp
%tmp = tail call i32 asm "foo $0", "=r"( ) ; <i32> [#uses=1]
ret i32 %tmp
}
int %test2(int %Y, int %X) {
define i32 @test2(i32 %Y, i32 %X) {
entry:
%tmp1 = tail call int asm "foo $0, $1", "=r,r"( int %X ) ; <int> [#uses=1]
ret int %tmp1
%tmp1 = tail call i32 asm "foo $0, $1", "=r,r"( i32 %X ) ; <i32> [#uses=1]
ret i32 %tmp1
}

View File

@ -1,10 +1,13 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep xori
; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori
int %test(bool %B, int* %P) {
br bool %B, label %T, label %F
T:
store int 123, int* %P
ret int 0
F:
ret int 17
define i32 @test(i1 %B, i32* %P) {
br i1 %B, label %T, label %F
T: ; preds = %0
store i32 123, i32* %P
ret i32 0
F: ; preds = %0
ret i32 17
}

View File

@ -1,7 +1,8 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep lha
; RUN: llvm-as < %s | llc -march=ppc32 | grep lha
uint %test(short* %a) {
%tmp.1 = load short* %a
%tmp.2 = cast short %tmp.1 to uint
ret uint %tmp.2
define i32 @test(i16* %a) {
%tmp.1 = load i16* %a ; <i16> [#uses=1]
%tmp.2 = sext i16 %tmp.1 to i32 ; <i32> [#uses=1]
ret i32 %tmp.2
}

View File

@ -1,9 +1,9 @@
; Should fold the ori into the lfs.
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep lfs
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep ori
; RUN: llvm-as < %s | llc -march=ppc32 | grep lfs
; RUN: llvm-as < %s | llc -march=ppc32 | not grep ori
float %test() {
%tmp.i = load float* cast (uint 186018016 to float*)
ret float %tmp.i
define float @test() {
%tmp.i = load float* inttoptr (i32 186018016 to float*) ; <float> [#uses=1]
ret float %tmp.i
}

View File

@ -1,17 +1,18 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep li.*16
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep addi
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep li.*16
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep addi
; Codegen lvx (R+16) as t = li 16, lvx t,R
; This shares the 16 between the two loads.
void %func(<4 x float>* %a, <4 x float>* %b) {
%tmp1 = getelementptr <4 x float>* %b, int 1
%tmp = load <4 x float>* %tmp1
%tmp3 = getelementptr <4 x float>* %a, int 1
%tmp4 = load <4 x float>* %tmp3
%tmp5 = mul <4 x float> %tmp, %tmp4
%tmp8 = load <4 x float>* %b
%tmp9 = add <4 x float> %tmp5, %tmp8
store <4 x float> %tmp9, <4 x float>* %a
ret void
define void @func(<4 x float>* %a, <4 x float>* %b) {
%tmp1 = getelementptr <4 x float>* %b, i32 1 ; <<4 x float>*> [#uses=1]
%tmp = load <4 x float>* %tmp1 ; <<4 x float>> [#uses=1]
%tmp3 = getelementptr <4 x float>* %a, i32 1 ; <<4 x float>*> [#uses=1]
%tmp4 = load <4 x float>* %tmp3 ; <<4 x float>> [#uses=1]
%tmp5 = mul <4 x float> %tmp, %tmp4 ; <<4 x float>> [#uses=1]
%tmp8 = load <4 x float>* %b ; <<4 x float>> [#uses=1]
%tmp9 = add <4 x float> %tmp5, %tmp8 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp9, <4 x float>* %a
ret void
}

View File

@ -1,68 +1,68 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -enable-ppc-preinc | \
; RUN: llvm-as < %s | llc -march=ppc32 -enable-ppc-preinc | \
; RUN: not grep addi
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -enable-ppc-preinc | \
; RUN: llvm-as < %s | llc -march=ppc64 -enable-ppc-preinc | \
; RUN: not grep addi
%Glob = global ulong 4
int *%test0(int *%X, int *%dest) {
%Y = getelementptr int* %X, int 4
%A = load int* %Y
store int %A, int* %dest
ret int* %Y
@Glob = global i64 4 ; <i64*> [#uses=2]
define i32* @test0(i32* %X, i32* %dest) {
%Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
%A = load i32* %Y ; <i32> [#uses=1]
store i32 %A, i32* %dest
ret i32* %Y
}
int *%test1(int *%X, int *%dest) {
%Y = getelementptr int* %X, int 4
%A = load int* %Y
store int %A, int* %dest
ret int* %Y
define i32* @test1(i32* %X, i32* %dest) {
%Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
%A = load i32* %Y ; <i32> [#uses=1]
store i32 %A, i32* %dest
ret i32* %Y
}
short *%test2(short *%X, int *%dest) {
%Y = getelementptr short* %X, int 4
%A = load short* %Y
%B = cast short %A to int
store int %B, int* %dest
ret short* %Y
define i16* @test2(i16* %X, i32* %dest) {
%Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2]
%A = load i16* %Y ; <i16> [#uses=1]
%B = sext i16 %A to i32 ; <i32> [#uses=1]
store i32 %B, i32* %dest
ret i16* %Y
}
ushort *%test3(ushort *%X, int *%dest) {
%Y = getelementptr ushort* %X, int 4
%A = load ushort* %Y
%B = cast ushort %A to int
store int %B, int* %dest
ret ushort* %Y
define i16* @test3(i16* %X, i32* %dest) {
%Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2]
%A = load i16* %Y ; <i16> [#uses=1]
%B = zext i16 %A to i32 ; <i32> [#uses=1]
store i32 %B, i32* %dest
ret i16* %Y
}
short *%test3a(short *%X, long *%dest) {
%Y = getelementptr short* %X, int 4
%A = load short* %Y
%B = cast short %A to long
store long %B, long* %dest
ret short* %Y
define i16* @test3a(i16* %X, i64* %dest) {
%Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2]
%A = load i16* %Y ; <i16> [#uses=1]
%B = sext i16 %A to i64 ; <i64> [#uses=1]
store i64 %B, i64* %dest
ret i16* %Y
}
long *%test4(long *%X, long *%dest) {
%Y = getelementptr long* %X, int 4
%A = load long* %Y
store long %A, long* %dest
ret long* %Y
define i64* @test4(i64* %X, i64* %dest) {
%Y = getelementptr i64* %X, i32 4 ; <i64*> [#uses=2]
%A = load i64* %Y ; <i64> [#uses=1]
store i64 %A, i64* %dest
ret i64* %Y
}
ushort *%test5(ushort *%X) {
%Y = getelementptr ushort* %X, int 4
store ushort 7, ushort* %Y
ret ushort* %Y
define i16* @test5(i16* %X) {
%Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2]
store i16 7, i16* %Y
ret i16* %Y
}
ulong *%test6(ulong *%X, ulong %A) {
%Y = getelementptr ulong* %X, int 4
store ulong %A, ulong* %Y
ret ulong* %Y
define i64* @test6(i64* %X, i64 %A) {
%Y = getelementptr i64* %X, i32 4 ; <i64*> [#uses=2]
store i64 %A, i64* %Y
ret i64* %Y
}
ulong *%test7(ulong *%X, ulong %A) {
store ulong %A, ulong* %Glob
ret ulong *%Glob
define i64* @test7(i64* %X, i64 %A) {
store i64 %A, i64* @Glob
ret i64* @Glob
}

View File

@ -1,8 +1,8 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mul
; RUN: llvm-as < %s | llc -march=ppc32 | not grep mul
int %test1(int %a) {
%tmp.1 = mul int %a, -2 ; <int> [#uses=1]
%tmp.2 = add int %tmp.1, 63 ; <int> [#uses=1]
ret int %tmp.2
define i32 @test1(i32 %a) {
%tmp.1 = mul i32 %a, -2 ; <i32> [#uses=1]
%tmp.2 = add i32 %tmp.1, 63 ; <i32> [#uses=1]
ret i32 %tmp.2
}

View File

@ -1,18 +1,17 @@
; All of these ands and shifts should be folded into rlwimi's
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
; RUN: not grep mulhwu %t
; RUN: not grep srawi %t
; RUN: not grep add %t
; RUN: grep mulhw %t | count 1
implementation ; Functions:
int %mulhs(int %a, int %b) {
define i32 @mulhs(i32 %a, i32 %b) {
entry:
%tmp.1 = cast int %a to ulong ; <ulong> [#uses=1]
%tmp.3 = cast int %b to ulong ; <ulong> [#uses=1]
%tmp.4 = mul ulong %tmp.3, %tmp.1 ; <ulong> [#uses=1]
%tmp.6 = shr ulong %tmp.4, ubyte 32 ; <ulong> [#uses=1]
%tmp.7 = cast ulong %tmp.6 to int ; <int> [#uses=1]
ret int %tmp.7
%tmp.1 = sext i32 %a to i64 ; <i64> [#uses=1]
%tmp.3 = sext i32 %b to i64 ; <i64> [#uses=1]
%tmp.4 = mul i64 %tmp.3, %tmp.1 ; <i64> [#uses=1]
%tmp.6 = lshr i64 %tmp.4, 32 ; <i64> [#uses=1]
%tmp.7 = trunc i64 %tmp.6 to i32 ; <i32> [#uses=1]
ret i32 %tmp.7
}

View File

@ -1,6 +1,7 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep neg
; RUN: llvm-as < %s | llc -march=ppc32 | grep neg
int %test(int %X) {
%Y = sub int 0, %X
ret int %Y
define i32 @test(i32 %X) {
%Y = sub i32 0, %X ; <i32> [#uses=1]
ret i32 %Y
}

View File

@ -1,22 +1,22 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc-apple-darwin8 | not grep ori
; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc-apple-darwin8 | not grep rlwimi
; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep ori
; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep rlwimi
int %test1(sbyte* %P) { ;; or -> lwzx
%tmp.2.i = cast sbyte* %P to uint
%tmp.4.i = and uint %tmp.2.i, 4294901760
%tmp.10.i = shr uint %tmp.2.i, ubyte 5
%tmp.11.i = and uint %tmp.10.i, 2040
%tmp.13.i = or uint %tmp.11.i, %tmp.4.i
%tmp.14.i = cast uint %tmp.13.i to int*
%tmp.3 = load int* %tmp.14.i
ret int %tmp.3
define i32 @test1(i8* %P) {
%tmp.2.i = ptrtoint i8* %P to i32 ; <i32> [#uses=2]
%tmp.4.i = and i32 %tmp.2.i, -65536 ; <i32> [#uses=1]
%tmp.10.i = lshr i32 %tmp.2.i, 5 ; <i32> [#uses=1]
%tmp.11.i = and i32 %tmp.10.i, 2040 ; <i32> [#uses=1]
%tmp.13.i = or i32 %tmp.11.i, %tmp.4.i ; <i32> [#uses=1]
%tmp.14.i = inttoptr i32 %tmp.13.i to i32* ; <i32*> [#uses=1]
%tmp.3 = load i32* %tmp.14.i ; <i32> [#uses=1]
ret i32 %tmp.3
}
int %test2(int %P) { ;; or -> lwz
%tmp.2 = shl int %P, ubyte 4
%tmp.3 = or int %tmp.2, 2
%tmp.4 = cast int %tmp.3 to int*
%tmp.5 = load int* %tmp.4
ret int %tmp.5
define i32 @test2(i32 %P) {
%tmp.2 = shl i32 %P, 4 ; <i32> [#uses=1]
%tmp.3 = or i32 %tmp.2, 2 ; <i32> [#uses=1]
%tmp.4 = inttoptr i32 %tmp.3 to i32* ; <i32*> [#uses=1]
%tmp.5 = load i32* %tmp.4 ; <i32> [#uses=1]
ret i32 %tmp.5
}

View File

@ -1,11 +1,12 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep or
%struct.foo = type { int, int, [0 x ubyte] }
int %test(%struct.foo* %X) {
%tmp1 = getelementptr %struct.foo* %X, int 0, uint 2, int 100
%tmp = load ubyte* %tmp1 ; <ubyte> [#uses=1]
%tmp2 = cast ubyte %tmp to int ; <int> [#uses=1]
ret int %tmp2}
; RUN: llvm-as < %s | llc -march=ppc32 | not grep or
%struct.foo = type { i32, i32, [0 x i8] }
define i32 @test(%struct.foo* %X) {
%tmp1 = getelementptr %struct.foo* %X, i32 0, i32 2, i32 100 ; <i8*> [#uses=1]
%tmp = load i8* %tmp1 ; <i8> [#uses=1]
%tmp2 = zext i8 %tmp to i32 ; <i32> [#uses=1]
ret i32 %tmp2
}

View File

@ -1,26 +1,26 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep {or }
; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi
; RUN: llvm-as < %s | llc -march=ppc32 | not grep {or }
; Make sure there is no register-register copies here.
void %test1(int *%A, int *%B, int *%D, int* %E) {
%A = load int* %A
%B = load int* %B
%X = and int %A, 15
%Y = and int %B, -16
%Z = or int %X, %Y
store int %Z, int* %D
store int %A, int* %E
define void @test1(i32* %A, i32* %B, i32* %D, i32* %E) {
%A.upgrd.1 = load i32* %A ; <i32> [#uses=2]
%B.upgrd.2 = load i32* %B ; <i32> [#uses=1]
%X = and i32 %A.upgrd.1, 15 ; <i32> [#uses=1]
%Y = and i32 %B.upgrd.2, -16 ; <i32> [#uses=1]
%Z = or i32 %X, %Y ; <i32> [#uses=1]
store i32 %Z, i32* %D
store i32 %A.upgrd.1, i32* %E
ret void
}
void %test2(int *%A, int *%B, int *%D, int* %E) {
%A = load int* %A
%B = load int* %B
%X = and int %A, 15
%Y = and int %B, -16
%Z = or int %X, %Y
store int %Z, int* %D
store int %B, int* %E
define void @test2(i32* %A, i32* %B, i32* %D, i32* %E) {
%A.upgrd.3 = load i32* %A ; <i32> [#uses=1]
%B.upgrd.4 = load i32* %B ; <i32> [#uses=2]
%X = and i32 %A.upgrd.3, 15 ; <i32> [#uses=1]
%Y = and i32 %B.upgrd.4, -16 ; <i32> [#uses=1]
%Z = or i32 %X, %Y ; <i32> [#uses=1]
store i32 %Z, i32* %D
store i32 %B.upgrd.4, i32* %E
ret void
}

View File

@ -1,72 +1,70 @@
; All of these ands and shifts should be folded into rlwimi's
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep and
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi | count 8
; RUN: llvm-as < %s | llc -march=ppc32 | not grep and
; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwimi | count 8
implementation ; Functions:
int %test1(int %x, int %y) {
define i32 @test1(i32 %x, i32 %y) {
entry:
%tmp.3 = shl int %x, ubyte 16 ; <int> [#uses=1]
%tmp.7 = and int %y, 65535 ; <int> [#uses=1]
%tmp.9 = or int %tmp.7, %tmp.3 ; <int> [#uses=1]
ret int %tmp.9
%tmp.3 = shl i32 %x, 16 ; <i32> [#uses=1]
%tmp.7 = and i32 %y, 65535 ; <i32> [#uses=1]
%tmp.9 = or i32 %tmp.7, %tmp.3 ; <i32> [#uses=1]
ret i32 %tmp.9
}
int %test2(int %x, int %y) {
define i32 @test2(i32 %x, i32 %y) {
entry:
%tmp.7 = and int %x, 65535 ; <int> [#uses=1]
%tmp.3 = shl int %y, ubyte 16 ; <int> [#uses=1]
%tmp.9 = or int %tmp.7, %tmp.3 ; <int> [#uses=1]
ret int %tmp.9
%tmp.7 = and i32 %x, 65535 ; <i32> [#uses=1]
%tmp.3 = shl i32 %y, 16 ; <i32> [#uses=1]
%tmp.9 = or i32 %tmp.7, %tmp.3 ; <i32> [#uses=1]
ret i32 %tmp.9
}
uint %test3(uint %x, uint %y) {
define i32 @test3(i32 %x, i32 %y) {
entry:
%tmp.3 = shr uint %x, ubyte 16 ; <uint> [#uses=1]
%tmp.6 = and uint %y, 4294901760 ; <uint> [#uses=1]
%tmp.7 = or uint %tmp.6, %tmp.3 ; <uint> [#uses=1]
ret uint %tmp.7
%tmp.3 = lshr i32 %x, 16 ; <i32> [#uses=1]
%tmp.6 = and i32 %y, -65536 ; <i32> [#uses=1]
%tmp.7 = or i32 %tmp.6, %tmp.3 ; <i32> [#uses=1]
ret i32 %tmp.7
}
uint %test4(uint %x, uint %y) {
define i32 @test4(i32 %x, i32 %y) {
entry:
%tmp.6 = and uint %x, 4294901760 ; <uint> [#uses=1]
%tmp.3 = shr uint %y, ubyte 16 ; <uint> [#uses=1]
%tmp.7 = or uint %tmp.6, %tmp.3 ; <uint> [#uses=1]
ret uint %tmp.7
%tmp.6 = and i32 %x, -65536 ; <i32> [#uses=1]
%tmp.3 = lshr i32 %y, 16 ; <i32> [#uses=1]
%tmp.7 = or i32 %tmp.6, %tmp.3 ; <i32> [#uses=1]
ret i32 %tmp.7
}
int %test5(int %x, int %y) {
define i32 @test5(i32 %x, i32 %y) {
entry:
%tmp.3 = shl int %x, ubyte 1 ; <int> [#uses=1]
%tmp.4 = and int %tmp.3, -65536 ; <int> [#uses=1]
%tmp.7 = and int %y, 65535 ; <int> [#uses=1]
%tmp.9 = or int %tmp.4, %tmp.7 ; <int> [#uses=1]
ret int %tmp.9
%tmp.3 = shl i32 %x, 1 ; <i32> [#uses=1]
%tmp.4 = and i32 %tmp.3, -65536 ; <i32> [#uses=1]
%tmp.7 = and i32 %y, 65535 ; <i32> [#uses=1]
%tmp.9 = or i32 %tmp.4, %tmp.7 ; <i32> [#uses=1]
ret i32 %tmp.9
}
int %test6(int %x, int %y) {
define i32 @test6(i32 %x, i32 %y) {
entry:
%tmp.7 = and int %x, 65535 ; <int> [#uses=1]
%tmp.3 = shl int %y, ubyte 1 ; <int> [#uses=1]
%tmp.4 = and int %tmp.3, -65536 ; <int> [#uses=1]
%tmp.9 = or int %tmp.4, %tmp.7 ; <int> [#uses=1]
ret int %tmp.9
%tmp.7 = and i32 %x, 65535 ; <i32> [#uses=1]
%tmp.3 = shl i32 %y, 1 ; <i32> [#uses=1]
%tmp.4 = and i32 %tmp.3, -65536 ; <i32> [#uses=1]
%tmp.9 = or i32 %tmp.4, %tmp.7 ; <i32> [#uses=1]
ret i32 %tmp.9
}
int %test7(int %x, int %y) {
define i32 @test7(i32 %x, i32 %y) {
entry:
%tmp.2 = and int %x, -65536 ; <int> [#uses=1]
%tmp.5 = and int %y, 65535 ; <int> [#uses=1]
%tmp.7 = or int %tmp.5, %tmp.2 ; <int> [#uses=1]
ret int %tmp.7
%tmp.2 = and i32 %x, -65536 ; <i32> [#uses=1]
%tmp.5 = and i32 %y, 65535 ; <i32> [#uses=1]
%tmp.7 = or i32 %tmp.5, %tmp.2 ; <i32> [#uses=1]
ret i32 %tmp.7
}
uint %test8(uint %bar) {
define i32 @test8(i32 %bar) {
entry:
%tmp.3 = shl uint %bar, ubyte 1 ; <uint> [#uses=1]
%tmp.4 = and uint %tmp.3, 2 ; <uint> [#uses=1]
%tmp.6 = and uint %bar, 4294967293 ; <uint> [#uses=1]
%tmp.7 = or uint %tmp.4, %tmp.6 ; <uint> [#uses=1]
ret uint %tmp.7
%tmp.3 = shl i32 %bar, 1 ; <i32> [#uses=1]
%tmp.4 = and i32 %tmp.3, 2 ; <i32> [#uses=1]
%tmp.6 = and i32 %bar, -3 ; <i32> [#uses=1]
%tmp.7 = or i32 %tmp.4, %tmp.6 ; <i32> [#uses=1]
ret i32 %tmp.7
}

View File

@ -1,31 +1,29 @@
; All of these ands and shifts should be folded into rlwimi's
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
; RUN: grep rlwimi %t | count 3
; RUN: grep srwi %t | count 1
; RUN: not grep slwi %t
implementation ; Functions:
ushort %test1(uint %srcA, uint %srcB, uint %alpha) {
define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) {
entry:
%tmp.1 = shl uint %srcA, ubyte 15 ; <uint> [#uses=1]
%tmp.4 = and uint %tmp.1, 32505856 ; <uint> [#uses=1]
%tmp.6 = and uint %srcA, 31775 ; <uint> [#uses=1]
%tmp.7 = or uint %tmp.4, %tmp.6 ; <uint> [#uses=1]
%tmp.9 = shl uint %srcB, ubyte 15 ; <uint> [#uses=1]
%tmp.12 = and uint %tmp.9, 32505856 ; <uint> [#uses=1]
%tmp.14 = and uint %srcB, 31775 ; <uint> [#uses=1]
%tmp.15 = or uint %tmp.12, %tmp.14 ; <uint> [#uses=1]
%tmp.18 = mul uint %tmp.7, %alpha ; <uint> [#uses=1]
%tmp.20 = sub uint 32, %alpha ; <uint> [#uses=1]
%tmp.22 = mul uint %tmp.15, %tmp.20 ; <uint> [#uses=1]
%tmp.23 = add uint %tmp.22, %tmp.18 ; <uint> [#uses=2]
%tmp.27 = shr uint %tmp.23, ubyte 5 ; <uint> [#uses=1]
%tmp.28 = cast uint %tmp.27 to ushort ; <ushort> [#uses=1]
%tmp.29 = and ushort %tmp.28, 31775 ; <ushort> [#uses=1]
%tmp.33 = shr uint %tmp.23, ubyte 20 ; <uint> [#uses=1]
%tmp.34 = cast uint %tmp.33 to ushort ; <ushort> [#uses=1]
%tmp.35 = and ushort %tmp.34, 992 ; <ushort> [#uses=1]
%tmp.36 = or ushort %tmp.29, %tmp.35 ; <ushort> [#uses=1]
ret ushort %tmp.36
%tmp.1 = shl i32 %srcA, 15 ; <i32> [#uses=1]
%tmp.4 = and i32 %tmp.1, 32505856 ; <i32> [#uses=1]
%tmp.6 = and i32 %srcA, 31775 ; <i32> [#uses=1]
%tmp.7 = or i32 %tmp.4, %tmp.6 ; <i32> [#uses=1]
%tmp.9 = shl i32 %srcB, 15 ; <i32> [#uses=1]
%tmp.12 = and i32 %tmp.9, 32505856 ; <i32> [#uses=1]
%tmp.14 = and i32 %srcB, 31775 ; <i32> [#uses=1]
%tmp.15 = or i32 %tmp.12, %tmp.14 ; <i32> [#uses=1]
%tmp.18 = mul i32 %tmp.7, %alpha ; <i32> [#uses=1]
%tmp.20 = sub i32 32, %alpha ; <i32> [#uses=1]
%tmp.22 = mul i32 %tmp.15, %tmp.20 ; <i32> [#uses=1]
%tmp.23 = add i32 %tmp.22, %tmp.18 ; <i32> [#uses=2]
%tmp.27 = lshr i32 %tmp.23, 5 ; <i32> [#uses=1]
%tmp.28 = trunc i32 %tmp.27 to i16 ; <i16> [#uses=1]
%tmp.29 = and i16 %tmp.28, 31775 ; <i16> [#uses=1]
%tmp.33 = lshr i32 %tmp.23, 20 ; <i32> [#uses=1]
%tmp.34 = trunc i32 %tmp.33 to i16 ; <i16> [#uses=1]
%tmp.35 = and i16 %tmp.34, 992 ; <i16> [#uses=1]
%tmp.36 = or i16 %tmp.29, %tmp.35 ; <i16> [#uses=1]
ret i16 %tmp.36
}

View File

@ -1,26 +1,25 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -stats |& \
; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
; RUN: grep {Number of machine instrs printed} | grep 12
ushort %Trans16Bit(uint %srcA, uint %srcB, uint %alpha) {
%tmp1 = shl uint %srcA, ubyte 15 ; <uint> [#uses=1]
%tmp2 = and uint %tmp1, 32505856 ; <uint> [#uses=1]
%tmp4 = and uint %srcA, 31775 ; <uint> [#uses=1]
%tmp5 = or uint %tmp2, %tmp4 ; <uint> [#uses=1]
%tmp7 = shl uint %srcB, ubyte 15 ; <uint> [#uses=1]
%tmp8 = and uint %tmp7, 32505856 ; <uint> [#uses=1]
%tmp10 = and uint %srcB, 31775 ; <uint> [#uses=1]
%tmp11 = or uint %tmp8, %tmp10 ; <uint> [#uses=1]
%tmp14 = mul uint %tmp5, %alpha ; <uint> [#uses=1]
%tmp16 = sub uint 32, %alpha ; <uint> [#uses=1]
%tmp18 = mul uint %tmp11, %tmp16 ; <uint> [#uses=1]
%tmp19 = add uint %tmp18, %tmp14 ; <uint> [#uses=2]
%tmp21 = shr uint %tmp19, ubyte 5 ; <uint> [#uses=1]
%tmp21 = cast uint %tmp21 to ushort ; <ushort> [#uses=1]
%tmp = and ushort %tmp21, 31775 ; <ushort> [#uses=1]
%tmp23 = shr uint %tmp19, ubyte 20 ; <uint> [#uses=1]
%tmp23 = cast uint %tmp23 to ushort ; <ushort> [#uses=1]
%tmp24 = and ushort %tmp23, 992 ; <ushort> [#uses=1]
%tmp25 = or ushort %tmp, %tmp24 ; <ushort> [#uses=1]
ret ushort %tmp25
define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {
%tmp1 = shl i32 %srcA, 15 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 32505856 ; <i32> [#uses=1]
%tmp4 = and i32 %srcA, 31775 ; <i32> [#uses=1]
%tmp5 = or i32 %tmp2, %tmp4 ; <i32> [#uses=1]
%tmp7 = shl i32 %srcB, 15 ; <i32> [#uses=1]
%tmp8 = and i32 %tmp7, 32505856 ; <i32> [#uses=1]
%tmp10 = and i32 %srcB, 31775 ; <i32> [#uses=1]
%tmp11 = or i32 %tmp8, %tmp10 ; <i32> [#uses=1]
%tmp14 = mul i32 %tmp5, %alpha ; <i32> [#uses=1]
%tmp16 = sub i32 32, %alpha ; <i32> [#uses=1]
%tmp18 = mul i32 %tmp11, %tmp16 ; <i32> [#uses=1]
%tmp19 = add i32 %tmp18, %tmp14 ; <i32> [#uses=2]
%tmp21 = lshr i32 %tmp19, 5 ; <i32> [#uses=1]
%tmp21.upgrd.1 = trunc i32 %tmp21 to i16 ; <i16> [#uses=1]
%tmp = and i16 %tmp21.upgrd.1, 31775 ; <i16> [#uses=1]
%tmp23 = lshr i32 %tmp19, 20 ; <i32> [#uses=1]
%tmp23.upgrd.2 = trunc i32 %tmp23 to i16 ; <i16> [#uses=1]
%tmp24 = and i16 %tmp23.upgrd.2, 992 ; <i16> [#uses=1]
%tmp25 = or i16 %tmp, %tmp24 ; <i16> [#uses=1]
ret i16 %tmp25
}

View File

@ -1,64 +1,61 @@
; All of these ands and shifts should be folded into rlwimi's
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
; RUN: not grep and %t
; RUN: not grep srawi %t
; RUN: not grep srwi %t
; RUN: not grep slwi %t
; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
; RUN: not grep and %t
; RUN: not grep srawi %t
; RUN: not grep srwi %t
; RUN: not grep slwi %t
; RUN: grep rlwinm %t | count 8
implementation ; Functions:
int %test1(int %a) {
define i32 @test1(i32 %a) {
entry:
%tmp.1 = and int %a, 268431360 ; <int> [#uses=1]
ret int %tmp.1
%tmp.1 = and i32 %a, 268431360 ; <i32> [#uses=1]
ret i32 %tmp.1
}
int %test2(int %a) {
define i32 @test2(i32 %a) {
entry:
%tmp.1 = and int %a, -268435441 ; <int> [#uses=1]
ret int %tmp.1
%tmp.1 = and i32 %a, -268435441 ; <i32> [#uses=1]
ret i32 %tmp.1
}
int %test3(int %a) {
define i32 @test3(i32 %a) {
entry:
%tmp.2 = shr int %a, ubyte 8 ; <int> [#uses=1]
%tmp.3 = and int %tmp.2, 255 ; <int> [#uses=1]
ret int %tmp.3
%tmp.2 = ashr i32 %a, 8 ; <i32> [#uses=1]
%tmp.3 = and i32 %tmp.2, 255 ; <i32> [#uses=1]
ret i32 %tmp.3
}
uint %test4(uint %a) {
define i32 @test4(i32 %a) {
entry:
%tmp.3 = shr uint %a, ubyte 8 ; <uint> [#uses=1]
%tmp.4 = and uint %tmp.3, 255 ; <uint> [#uses=1]
ret uint %tmp.4
%tmp.3 = lshr i32 %a, 8 ; <i32> [#uses=1]
%tmp.4 = and i32 %tmp.3, 255 ; <i32> [#uses=1]
ret i32 %tmp.4
}
int %test5(int %a) {
define i32 @test5(i32 %a) {
entry:
%tmp.2 = shl int %a, ubyte 8 ; <int> [#uses=1]
%tmp.3 = and int %tmp.2, -8388608 ; <int> [#uses=1]
ret int %tmp.3
%tmp.2 = shl i32 %a, 8 ; <i32> [#uses=1]
%tmp.3 = and i32 %tmp.2, -8388608 ; <i32> [#uses=1]
ret i32 %tmp.3
}
int %test6(int %a) {
define i32 @test6(i32 %a) {
entry:
%tmp.1 = and int %a, 65280 ; <int> [#uses=1]
%tmp.2 = shr int %tmp.1, ubyte 8 ; <uint> [#uses=1]
ret int %tmp.2
%tmp.1 = and i32 %a, 65280 ; <i32> [#uses=1]
%tmp.2 = ashr i32 %tmp.1, 8 ; <i32> [#uses=1]
ret i32 %tmp.2
}
uint %test7(uint %a) {
define i32 @test7(i32 %a) {
entry:
%tmp.1 = and uint %a, 65280 ; <uint> [#uses=1]
%tmp.2 = shr uint %tmp.1, ubyte 8 ; <uint> [#uses=1]
ret uint %tmp.2
%tmp.1 = and i32 %a, 65280 ; <i32> [#uses=1]
%tmp.2 = lshr i32 %tmp.1, 8 ; <i32> [#uses=1]
ret i32 %tmp.2
}
int %test8(int %a) {
define i32 @test8(i32 %a) {
entry:
%tmp.1 = and int %a, 16711680 ; <int> [#uses=1]
%tmp.2 = shl int %tmp.1, ubyte 8 ; <int> [#uses=1]
ret int %tmp.2
%tmp.1 = and i32 %a, 16711680 ; <i32> [#uses=1]
%tmp.2 = shl i32 %tmp.1, 8 ; <i32> [#uses=1]
ret i32 %tmp.2
}

View File

@ -1,51 +1,50 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep cmp
; RUN: llvm-as < %s | llc -march=ppc32 | not grep cmp
int %seli32_1(int %a) {
define i32 @seli32_1(i32 %a) {
entry:
%tmp.1 = setlt int %a, 0
%retval = select bool %tmp.1, int 5, int 0
ret int %retval
%tmp.1 = icmp slt i32 %a, 0 ; <i1> [#uses=1]
%retval = select i1 %tmp.1, i32 5, i32 0 ; <i32> [#uses=1]
ret i32 %retval
}
int %seli32_2(int %a, int %b) {
define i32 @seli32_2(i32 %a, i32 %b) {
entry:
%tmp.1 = setlt int %a, 0
%retval = select bool %tmp.1, int %b, int 0
ret int %retval
%tmp.1 = icmp slt i32 %a, 0 ; <i1> [#uses=1]
%retval = select i1 %tmp.1, i32 %b, i32 0 ; <i32> [#uses=1]
ret i32 %retval
}
int %seli32_3(int %a, short %b) {
define i32 @seli32_3(i32 %a, i16 %b) {
entry:
%tmp.2 = cast short %b to int
%tmp.1 = setlt int %a, 0
%retval = select bool %tmp.1, int %tmp.2, int 0
ret int %retval
%tmp.2 = sext i16 %b to i32 ; <i32> [#uses=1]
%tmp.1 = icmp slt i32 %a, 0 ; <i1> [#uses=1]
%retval = select i1 %tmp.1, i32 %tmp.2, i32 0 ; <i32> [#uses=1]
ret i32 %retval
}
int %seli32_4(int %a, ushort %b) {
define i32 @seli32_4(i32 %a, i16 %b) {
entry:
%tmp.2 = cast ushort %b to int
%tmp.1 = setlt int %a, 0
%retval = select bool %tmp.1, int %tmp.2, int 0
ret int %retval
%tmp.2 = zext i16 %b to i32 ; <i32> [#uses=1]
%tmp.1 = icmp slt i32 %a, 0 ; <i1> [#uses=1]
%retval = select i1 %tmp.1, i32 %tmp.2, i32 0 ; <i32> [#uses=1]
ret i32 %retval
}
short %seli16_1(short %a) {
define i16 @seli16_1(i16 %a) {
entry:
%tmp.1 = setlt short %a, 0
%retval = select bool %tmp.1, short 7, short 0
ret short %retval
%tmp.1 = icmp slt i16 %a, 0 ; <i1> [#uses=1]
%retval = select i1 %tmp.1, i16 7, i16 0 ; <i16> [#uses=1]
ret i16 %retval
}
short %seli16_2(int %a, short %b) {
%tmp.1 = setlt int %a, 0
%retval = select bool %tmp.1, short %b, short 0
ret short %retval
define i16 @seli16_2(i32 %a, i16 %b) {
%tmp.1 = icmp slt i32 %a, 0 ; <i1> [#uses=1]
%retval = select i1 %tmp.1, i16 %b, i16 0 ; <i16> [#uses=1]
ret i16 %retval
}
int %seli32_a_a(int %a) {
%tmp = setlt int %a, 1
%min = select bool %tmp, int %a, int 0
ret int %min
define i32 @seli32_a_a(i32 %a) {
%tmp = icmp slt i32 %a, 1 ; <i1> [#uses=1]
%min = select i1 %tmp, i32 %a, i32 0 ; <i32> [#uses=1]
ret i32 %min
}

View File

@ -1,8 +1,9 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep rlwinm
; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwinm
int %setcc_one_or_zero(int* %a) {
define i32 @setcc_one_or_zero(i32* %a) {
entry:
%tmp.1 = setne int* %a, null
%inc.1 = cast bool %tmp.1 to int
ret int %inc.1
%tmp.1 = icmp ne i32* %a, null ; <i1> [#uses=1]
%inc.1 = zext i1 %tmp.1 to i32 ; <i32> [#uses=1]
ret i32 %inc.1
}

View File

@ -1,9 +1,10 @@
; RUN: llvm-upgrade < %s | llvm-as | \
; RUN: llvm-as < %s | \
; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
; RUN: grep {srwi r., r., 5}
int %eq0(int %a) {
%tmp.1 = seteq int %a, 0 ; <bool> [#uses=1]
%tmp.2 = cast bool %tmp.1 to int ; <int> [#uses=1]
ret int %tmp.2
define i32 @eq0(i32 %a) {
%tmp.1 = icmp eq i32 %a, 0 ; <i1> [#uses=1]
%tmp.2 = zext i1 %tmp.1 to i32 ; <i32> [#uses=1]
ret i32 %tmp.2
}

View File

@ -1,17 +1,18 @@
; This test should not contain a sign extend
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep extsb
; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb
int %test(uint %mode.0.i.0) {
%tmp.79 = cast uint %mode.0.i.0 to sbyte ; <sbyte> [#uses=1]
%tmp.80 = cast sbyte %tmp.79 to int ; <int> [#uses=1]
%tmp.81 = shl int %tmp.80, ubyte 24 ; <int> [#uses=1]
ret int %tmp.81
define i32 @test(i32 %mode.0.i.0) {
%tmp.79 = trunc i32 %mode.0.i.0 to i8 ; <i8> [#uses=1]
%tmp.80 = sext i8 %tmp.79 to i32 ; <i32> [#uses=1]
%tmp.81 = shl i32 %tmp.80, 24 ; <i32> [#uses=1]
ret i32 %tmp.81
}
int %test2(uint %mode.0.i.0) {
%tmp.79 = cast uint %mode.0.i.0 to sbyte ; <sbyte> [#uses=1]
%tmp.80 = cast sbyte %tmp.79 to int ; <int> [#uses=1]
%tmp.81 = shl int %tmp.80, ubyte 16 ; <int> [#uses=1]
%tmp.82 = and int %tmp.81, 16711680
ret int %tmp.82
define i32 @test2(i32 %mode.0.i.0) {
%tmp.79 = trunc i32 %mode.0.i.0 to i8 ; <i8> [#uses=1]
%tmp.80 = sext i8 %tmp.79 to i32 ; <i32> [#uses=1]
%tmp.81 = shl i32 %tmp.80, 16 ; <i32> [#uses=1]
%tmp.82 = and i32 %tmp.81, 16711680 ; <i32> [#uses=1]
ret i32 %tmp.82
}

View File

@ -1,26 +1,26 @@
; RUN: llvm-upgrade < %s | llvm-as | \
; RUN: llvm-as < %s | \
; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1 -f
; RUN: grep stfiwx %t1
; RUN: not grep r1 %t1
; RUN: llvm-upgrade < %s | llvm-as | \
; RUN: llvm-as < %s | \
; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
; RUN: -o %t2 -f
; RUN: not grep stfiwx %t2
; RUN: grep r1 %t2
void %test(float %a, int* %b) {
%tmp.2 = cast float %a to int
store int %tmp.2, int* %b
define void @test(float %a, i32* %b) {
%tmp.2 = fptosi float %a to i32 ; <i32> [#uses=1]
store i32 %tmp.2, i32* %b
ret void
}
void %test2(float %a, int* %b, int %i) {
%tmp.2 = getelementptr int* %b, int 1
%tmp.5 = getelementptr int* %b, int %i
%tmp.7 = cast float %a to int
store int %tmp.7, int* %tmp.5
store int %tmp.7, int* %tmp.2
store int %tmp.7, int* %b
define void @test2(float %a, i32* %b, i32 %i) {
%tmp.2 = getelementptr i32* %b, i32 1 ; <i32*> [#uses=1]
%tmp.5 = getelementptr i32* %b, i32 %i ; <i32*> [#uses=1]
%tmp.7 = fptosi float %a to i32 ; <i32> [#uses=3]
store i32 %tmp.7, i32* %tmp.5
store i32 %tmp.7, i32* %tmp.2
store i32 %tmp.7, i32* %b
ret void
}

View File

@ -1,6 +1,8 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep lwz
int %test(int* %P) {
store int 1, int* %P
%V = load int* %P
ret int %V
; RUN: llvm-as < %s | llc -march=ppc32 | not grep lwz
define i32 @test(i32* %P) {
store i32 1, i32* %P
%V = load i32* %P ; <i32> [#uses=1]
ret i32 %V
}

View File

@ -1,26 +1,25 @@
; All of these should be codegen'd without loading immediates
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
; RUN: grep subfc %t | count 1
; RUN: grep subfe %t | count 1
; RUN: grep subfze %t | count 1
; RUN: grep subfme %t | count 1
; RUN: grep subfic %t | count 2
implementation ; Functions:
long %sub_ll(long %a, long %b) {
define i64 @sub_ll(i64 %a, i64 %b) {
entry:
%tmp.2 = sub long %a, %b ; <long> [#uses=1]
ret long %tmp.2
%tmp.2 = sub i64 %a, %b ; <i64> [#uses=1]
ret i64 %tmp.2
}
long %sub_l_5(long %a) {
define i64 @sub_l_5(i64 %a) {
entry:
%tmp.1 = sub long 5, %a ; <long> [#uses=1]
ret long %tmp.1
%tmp.1 = sub i64 5, %a ; <i64> [#uses=1]
ret i64 %tmp.1
}
long %sub_l_m5(long %a) {
define i64 @sub_l_m5(i64 %a) {
entry:
%tmp.1 = sub long -5, %a ; <long> [#uses=1]
ret long %tmp.1
%tmp.1 = sub i64 -5, %a ; <i64> [#uses=1]
ret i64 %tmp.1
}

View File

@ -1,21 +1,22 @@
; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single
; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single
; "vcmpbfp.".
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
void %test(<4 x float>* %x, <4 x float>* %y, int* %P) {
define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {
entry:
%tmp = load <4 x float>* %x ; <<4 x float>> [#uses=1]
%tmp2 = load <4 x float>* %y ; <<4 x float>> [#uses=1]
%tmp = call int %llvm.ppc.altivec.vcmpbfp.p( int 1, <4 x float> %tmp, <4 x float> %tmp2 ) ; <int> [#uses=1]
%tmp.upgrd.1 = call i32 @llvm.ppc.altivec.vcmpbfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp2 ) ; <i32> [#uses=1]
%tmp4 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
%tmp6 = load <4 x float>* %y ; <<4 x float>> [#uses=1]
%tmp = call <4 x int> %llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 ) ; <<4 x int>> [#uses=1]
%tmp7 = cast <4 x int> %tmp to <4 x float> ; <<4 x float>> [#uses=1]
%tmp.upgrd.2 = call <4 x i32> @llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 ) ; <<4 x i32>> [#uses=1]
%tmp7 = bitcast <4 x i32> %tmp.upgrd.2 to <4 x float> ; <<4 x float>> [#uses=1]
store <4 x float> %tmp7, <4 x float>* %x
store int %tmp, int* %P
store i32 %tmp.upgrd.1, i32* %P
ret void
}
declare int %llvm.ppc.altivec.vcmpbfp.p(int, <4 x float>, <4 x float>)
declare i32 @llvm.ppc.altivec.vcmpbfp.p(i32, <4 x float>, <4 x float>)
declare <4 x int> %llvm.ppc.altivec.vcmpbfp(<4 x float>, <4 x float>)
declare <4 x i32> @llvm.ppc.altivec.vcmpbfp(<4 x float>, <4 x float>)

View File

@ -1,23 +1,22 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
; RUN: grep vcmpeqfp. %t
; RUN: not grep mfcr %t
; A predicate compare used immediately by a branch should not generate an mfcr.
void %test(<4 x float>* %A, <4 x float>* %B) {
%tmp = load <4 x float>* %A
%tmp3 = load <4 x float>* %B
%tmp = tail call int %llvm.ppc.altivec.vcmpeqfp.p( int 1, <4 x float> %tmp, <4 x float> %tmp3 )
%tmp = seteq int %tmp, 0
br bool %tmp, label %cond_true, label %UnifiedReturnBlock
define void @test(<4 x float>* %A, <4 x float>* %B) {
%tmp = load <4 x float>* %A ; <<4 x float>> [#uses=1]
%tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = tail call i32 @llvm.ppc.altivec.vcmpeqfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp3 ) ; <i32> [#uses=1]
%tmp.upgrd.2 = icmp eq i32 %tmp.upgrd.1, 0 ; <i1> [#uses=1]
br i1 %tmp.upgrd.2, label %cond_true, label %UnifiedReturnBlock
cond_true:
store <4 x float> zeroinitializer, <4 x float>* %B
ret void
cond_true: ; preds = %0
store <4 x float> zeroinitializer, <4 x float>* %B
ret void
UnifiedReturnBlock:
ret void
UnifiedReturnBlock: ; preds = %0
ret void
}
declare int %llvm.ppc.altivec.vcmpeqfp.p(int, <4 x float>, <4 x float>)
declare i32 @llvm.ppc.altivec.vcmpeqfp.p(i32, <4 x float>, <4 x float>)

View File

@ -1,11 +1,11 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
<4 x int> %test_arg(<4 x int> %A, <4 x int> %B) {
%C = add <4 x int> %A, %B
ret <4 x int> %C
define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {
%C = add <4 x i32> %A, %B ; <<4 x i32>> [#uses=1]
ret <4 x i32> %C
}
<4 x int> %foo() {
%X = call <4 x int> %test_arg(<4 x int> zeroinitializer, <4 x int> zeroinitializer)
ret <4 x int> %X
define <4 x i32> @foo() {
%X = call <4 x i32> @test_arg( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %X
}

View File

@ -1,47 +1,40 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep CPI
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep CPI
; Tests spltw(0x80000000) and spltw(0x7FFFFFFF).
void %test1(<4 x int>* %P1, <4 x int>* %P2, <4 x float>* %P3) {
%tmp = load <4 x int>* %P1
%tmp4 = and <4 x int> %tmp, < int -2147483648, int -2147483648, int -2147483648, int -2147483648 >
store <4 x int> %tmp4, <4 x int>* %P1
%tmp7 = load <4 x int>* %P2
%tmp9 = and <4 x int> %tmp7, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 >
store <4 x int> %tmp9, <4 x int>* %P2
%tmp = load <4 x float>* %P3
%tmp11 = cast <4 x float> %tmp to <4 x int>
%tmp12 = and <4 x int> %tmp11, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 >
%tmp13 = cast <4 x int> %tmp12 to <4 x float>
store <4 x float> %tmp13, <4 x float>* %P3
ret void
define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
%tmp = load <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]
%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp4, <4 x i32>* %P1
%tmp7 = load <4 x i32>* %P2 ; <<4 x i32>> [#uses=1]
%tmp9 = and <4 x i32> %tmp7, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 > ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp9, <4 x i32>* %P2
%tmp.upgrd.1 = load <4 x float>* %P3 ; <<4 x float>> [#uses=1]
%tmp11 = bitcast <4 x float> %tmp.upgrd.1 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp12 = and <4 x i32> %tmp11, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 > ; <<4 x i32>> [#uses=1]
%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float> ; <<4 x float>> [#uses=1]
store <4 x float> %tmp13, <4 x float>* %P3
ret void
}
<4 x int> %test_30() {
ret <4 x int> <int 30, int 30, int 30, int 30>
define <4 x i32> @test_30() {
ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
}
<4 x int> %test_29() {
ret <4 x int> <int 29, int 29, int 29, int 29>
define <4 x i32> @test_29() {
ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
}
<8 x short> %test_n30() {
ret <8 x short> <short -30, short -30, short -30, short -30,
short -30, short -30, short -30, short -30>
define <8 x i16> @test_n30() {
ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
}
<16 x sbyte> %test_n104() {
ret <16 x sbyte> <sbyte -104, sbyte -104, sbyte -104, sbyte -104,
sbyte -104, sbyte -104, sbyte -104, sbyte -104,
sbyte -104, sbyte -104, sbyte -104, sbyte -104,
sbyte -104, sbyte -104, sbyte -104, sbyte -104>
define <16 x i8> @test_n104() {
ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
}
<4 x int> %test_vsldoi() {
ret <4 x int> <int 512, int 512, int 512, int 512>
define <4 x i32> @test_vsldoi() {
ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
}
<4 x int> %test_rol() {
ret <4 x int> <int -11534337, int -11534337, int -11534337, int -11534337>
define <4 x i32> @test_rol() {
ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
}

View File

@ -1,24 +1,23 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep mullw
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vmsumuhm
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep mullw
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vmsumuhm
<4 x int> %test_v4i32(<4 x int>* %X, <4 x int>* %Y) {
%tmp = load <4 x int>* %X
%tmp2 = load <4 x int>* %Y
%tmp3 = mul <4 x int> %tmp, %tmp2
ret <4 x int> %tmp3
define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
%tmp = load <4 x i32>* %X ; <<4 x i32>> [#uses=1]
%tmp2 = load <4 x i32>* %Y ; <<4 x i32>> [#uses=1]
%tmp3 = mul <4 x i32> %tmp, %tmp2 ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp3
}
<8 x short> %test_v8i16(<8 x short>* %X, <8 x short>* %Y) {
%tmp = load <8 x short>* %X
%tmp2 = load <8 x short>* %Y
%tmp3 = mul <8 x short> %tmp, %tmp2
ret <8 x short> %tmp3
define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
%tmp = load <8 x i16>* %X ; <<8 x i16>> [#uses=1]
%tmp2 = load <8 x i16>* %Y ; <<8 x i16>> [#uses=1]
%tmp3 = mul <8 x i16> %tmp, %tmp2 ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp3
}
<16 x sbyte> %test_v16i8(<16 x sbyte>* %X, <16 x sbyte>* %Y) {
%tmp = load <16 x sbyte>* %X
%tmp2 = load <16 x sbyte>* %Y
%tmp3 = mul <16 x sbyte> %tmp, %tmp2
ret <16 x sbyte> %tmp3
define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
%tmp = load <16 x i8>* %X ; <<16 x i8>> [#uses=1]
%tmp2 = load <16 x i8>* %Y ; <<16 x i8>> [#uses=1]
%tmp3 = mul <16 x i8> %tmp, %tmp2 ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp3
}

View File

@ -1,42 +1,36 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vperm
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
<4 x float> %test_uu72(<4 x float> *%P1, <4 x float> *%P2) {
%V1 = load <4 x float> *%P1
%V2 = load <4 x float> *%P2
; vmrglw + vsldoi
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
<4 x uint> <uint undef, uint undef, uint 7, uint 2>
define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
%V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 undef, i32 undef, i32 7, i32 2 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
<4 x float> %test_30u5(<4 x float> *%P1, <4 x float> *%P2) {
%V1 = load <4 x float> *%P1
%V2 = load <4 x float> *%P2
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
<4 x uint> <uint 3, uint 0, uint undef, uint 5>
define <4 x float> @test_30u5(<4 x float>* %P1, <4 x float>* %P2) {
%V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 0, i32 undef, i32 5 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
<4 x float> %test_3u73(<4 x float> *%P1, <4 x float> *%P2) {
%V1 = load <4 x float> *%P1
%V2 = load <4 x float> *%P2
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
<4 x uint> <uint 3, uint undef, uint 7, uint 3>
define <4 x float> @test_3u73(<4 x float>* %P1, <4 x float>* %P2) {
%V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 undef, i32 7, i32 3 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
<4 x float> %test_3774(<4 x float> *%P1, <4 x float> *%P2) {
%V1 = load <4 x float> *%P1
%V2 = load <4 x float> *%P2
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
<4 x uint> <uint 3, uint 7, uint 7, uint 4>
define <4 x float> @test_3774(<4 x float>* %P1, <4 x float>* %P2) {
%V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 7, i32 7, i32 4 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
<4 x float> %test_4450(<4 x float> *%P1, <4 x float> *%P2) {
%V1 = load <4 x float> *%P1
%V2 = load <4 x float> *%P2
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
<4 x uint> <uint 4, uint 4, uint 5, uint 0>
define <4 x float> @test_4450(<4 x float>* %P1, <4 x float>* %P2) {
%V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 4, i32 4, i32 5, i32 0 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}

View File

@ -1,506 +1,504 @@
; RUN: llvm-upgrade < %s | llvm-as | opt -instcombine | \
; RUN: llvm-as < %s | opt -instcombine | \
; RUN: llc -march=ppc32 -mcpu=g5 | not grep vperm
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 > %t
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 > %t
; RUN: grep vsldoi %t | count 2
; RUN: grep vmrgh %t | count 7
; RUN: grep vmrgl %t | count 6
; RUN: grep vpkuhum %t | count 1
; RUN: grep vpkuwum %t | count 1
void %VSLDOI_xy(<8 x short>* %A, <8 x short>* %B) {
define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
entry:
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
%tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
%tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
%tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
%tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
%tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
%tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
%tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
%tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
%tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
%tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
%tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
%tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
%tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
%tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
%tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
%tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
%tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
%tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
%tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
%tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
store <8 x short> %tmp33, <8 x short>* %A
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1]
%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7 ; <i8> [#uses=1]
%tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8 ; <i8> [#uses=1]
%tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9 ; <i8> [#uses=1]
%tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10 ; <i8> [#uses=1]
%tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11 ; <i8> [#uses=1]
%tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12 ; <i8> [#uses=1]
%tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13 ; <i8> [#uses=1]
%tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14 ; <i8> [#uses=1]
%tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15 ; <i8> [#uses=1]
%tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0 ; <i8> [#uses=1]
%tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1 ; <i8> [#uses=1]
%tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2 ; <i8> [#uses=1]
%tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3 ; <i8> [#uses=1]
%tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4 ; <i8> [#uses=1]
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0 ; <<16 x i8>> [#uses=1]
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
%tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A
ret void
}
void %VSLDOI_xx(<8 x short>* %A, <8 x short>* %B) {
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
%tmp2 = load <8 x short>* %A ; <<8 x short>> [#uses=1]
%tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
%tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
%tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
%tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
%tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
%tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
%tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
%tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
%tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
%tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
%tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
%tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
%tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
%tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
%tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
%tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
%tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
%tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
%tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
store <8 x short> %tmp33, <8 x short>* %A
define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
%tmp2 = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7 ; <i8> [#uses=1]
%tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8 ; <i8> [#uses=1]
%tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9 ; <i8> [#uses=1]
%tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10 ; <i8> [#uses=1]
%tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11 ; <i8> [#uses=1]
%tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12 ; <i8> [#uses=1]
%tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13 ; <i8> [#uses=1]
%tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14 ; <i8> [#uses=1]
%tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15 ; <i8> [#uses=1]
%tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0 ; <i8> [#uses=1]
%tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1 ; <i8> [#uses=1]
%tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2 ; <i8> [#uses=1]
%tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3 ; <i8> [#uses=1]
%tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4 ; <i8> [#uses=1]
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0 ; <<16 x i8>> [#uses=1]
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
%tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A
ret void
}
void %VPERM_promote(<8 x short>* %A, <8 x short>* %B) {
define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
entry:
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
%tmp = cast <8 x short> %tmp to <4 x int> ; <<4 x int>> [#uses=1]
%tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
%tmp2 = cast <8 x short> %tmp2 to <4 x int> ; <<4 x int>> [#uses=1]
%tmp3 = call <4 x int> %llvm.ppc.altivec.vperm( <4 x int> %tmp, <4 x int> %tmp2, <16 x sbyte> < sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14 > ) ; <<4 x int>> [#uses=1]
%tmp3 = cast <4 x int> %tmp3 to <8 x short> ; <<8 x short>> [#uses=1]
store <8 x short> %tmp3, <8 x short>* %A
ret void
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1]
%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > ) ; <<4 x i32>> [#uses=1]
%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16> ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A
ret void
}
declare <4 x int> %llvm.ppc.altivec.vperm(<4 x int>, <4 x int>, <16 x sbyte>)
declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
void %tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
entry:
%tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
%tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
%tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
%tmp3 = extractelement <16 x sbyte> %tmp2, uint 8 ; <sbyte> [#uses=1]
%tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
%tmp5 = extractelement <16 x sbyte> %tmp2, uint 9 ; <sbyte> [#uses=1]
%tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
%tmp7 = extractelement <16 x sbyte> %tmp2, uint 10 ; <sbyte> [#uses=1]
%tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement <16 x sbyte> %tmp2, uint 11 ; <sbyte> [#uses=1]
%tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
%tmp11 = extractelement <16 x sbyte> %tmp2, uint 12 ; <sbyte> [#uses=1]
%tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
%tmp13 = extractelement <16 x sbyte> %tmp2, uint 13 ; <sbyte> [#uses=1]
%tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
%tmp15 = extractelement <16 x sbyte> %tmp2, uint 14 ; <sbyte> [#uses=1]
%tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
%tmp17 = extractelement <16 x sbyte> %tmp2, uint 15 ; <sbyte> [#uses=1]
%tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
store <16 x sbyte> %tmp33, <16 x sbyte>* %A
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8]
%tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8]
%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp2, i32 8 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
%tmp5 = extractelement <16 x i8> %tmp2, i32 9 ; <i8> [#uses=1]
%tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
%tmp7 = extractelement <16 x i8> %tmp2, i32 10 ; <i8> [#uses=1]
%tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
%tmp9 = extractelement <16 x i8> %tmp2, i32 11 ; <i8> [#uses=1]
%tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
%tmp11 = extractelement <16 x i8> %tmp2, i32 12 ; <i8> [#uses=1]
%tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
%tmp13 = extractelement <16 x i8> %tmp2, i32 13 ; <i8> [#uses=1]
%tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
%tmp15 = extractelement <16 x i8> %tmp2, i32 14 ; <i8> [#uses=1]
%tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
%tmp17 = extractelement <16 x i8> %tmp2, i32 15 ; <i8> [#uses=1]
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0 ; <<16 x i8>> [#uses=1]
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
store <16 x i8> %tmp33, <16 x i8>* %A
ret void
}
void %th_l(<8 x short>* %A, <8 x short>* %B) {
define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
entry:
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
%tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
%tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
%tmp3 = extractelement <8 x short> %tmp2, uint 4 ; <short> [#uses=1]
%tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
%tmp5 = extractelement <8 x short> %tmp2, uint 5 ; <short> [#uses=1]
%tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
%tmp7 = extractelement <8 x short> %tmp2, uint 6 ; <short> [#uses=1]
%tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
%tmp9 = extractelement <8 x short> %tmp2, uint 7 ; <short> [#uses=1]
%tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
%tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
%tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
%tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
%tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
%tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
%tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
%tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
store <8 x short> %tmp17, <8 x short>* %A
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4]
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4]
%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp2, i32 4 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
%tmp5 = extractelement <8 x i16> %tmp2, i32 5 ; <i16> [#uses=1]
%tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
%tmp7 = extractelement <8 x i16> %tmp2, i32 6 ; <i16> [#uses=1]
%tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
%tmp9 = extractelement <8 x i16> %tmp2, i32 7 ; <i16> [#uses=1]
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0 ; <<8 x i16>> [#uses=1]
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp17, <8 x i16>* %A
ret void
}
void %tw_l(<4 x int>* %A, <4 x int>* %B) {
define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
entry:
%tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
%tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
%tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
%tmp3 = extractelement <4 x int> %tmp2, uint 2 ; <int> [#uses=1]
%tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
%tmp5 = extractelement <4 x int> %tmp2, uint 3 ; <int> [#uses=1]
%tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
%tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
%tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
%tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
store <4 x int> %tmp9, <4 x int>* %A
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
%tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp2, i32 2 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
%tmp5 = extractelement <4 x i32> %tmp2, i32 3 ; <i32> [#uses=1]
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0 ; <<4 x i32>> [#uses=1]
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp9, <4 x i32>* %A
ret void
}
void %tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
entry:
%tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
%tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
%tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
%tmp3 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
%tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
%tmp5 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
%tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
%tmp7 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
%tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
%tmp9 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
%tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
%tmp11 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
%tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
%tmp13 = extractelement <16 x sbyte> %tmp2, uint 5 ; <sbyte> [#uses=1]
%tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
%tmp15 = extractelement <16 x sbyte> %tmp2, uint 6 ; <sbyte> [#uses=1]
%tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
%tmp17 = extractelement <16 x sbyte> %tmp2, uint 7 ; <sbyte> [#uses=1]
%tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
store <16 x sbyte> %tmp33, <16 x sbyte>* %A
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8]
%tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8]
%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp2, i32 0 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
%tmp5 = extractelement <16 x i8> %tmp2, i32 1 ; <i8> [#uses=1]
%tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
%tmp7 = extractelement <16 x i8> %tmp2, i32 2 ; <i8> [#uses=1]
%tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
%tmp9 = extractelement <16 x i8> %tmp2, i32 3 ; <i8> [#uses=1]
%tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
%tmp11 = extractelement <16 x i8> %tmp2, i32 4 ; <i8> [#uses=1]
%tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
%tmp13 = extractelement <16 x i8> %tmp2, i32 5 ; <i8> [#uses=1]
%tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
%tmp15 = extractelement <16 x i8> %tmp2, i32 6 ; <i8> [#uses=1]
%tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
%tmp17 = extractelement <16 x i8> %tmp2, i32 7 ; <i8> [#uses=1]
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0 ; <<16 x i8>> [#uses=1]
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
store <16 x i8> %tmp33, <16 x i8>* %A
ret void
}
void %th_h(<8 x short>* %A, <8 x short>* %B) {
define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
entry:
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
%tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
%tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
%tmp3 = extractelement <8 x short> %tmp2, uint 0 ; <short> [#uses=1]
%tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
%tmp5 = extractelement <8 x short> %tmp2, uint 1 ; <short> [#uses=1]
%tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
%tmp7 = extractelement <8 x short> %tmp2, uint 2 ; <short> [#uses=1]
%tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
%tmp9 = extractelement <8 x short> %tmp2, uint 3 ; <short> [#uses=1]
%tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
%tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
%tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
%tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
%tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
%tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
%tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
%tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
store <8 x short> %tmp17, <8 x short>* %A
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4]
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4]
%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp2, i32 0 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
%tmp5 = extractelement <8 x i16> %tmp2, i32 1 ; <i16> [#uses=1]
%tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
%tmp7 = extractelement <8 x i16> %tmp2, i32 2 ; <i16> [#uses=1]
%tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
%tmp9 = extractelement <8 x i16> %tmp2, i32 3 ; <i16> [#uses=1]
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0 ; <<8 x i16>> [#uses=1]
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp17, <8 x i16>* %A
ret void
}
void %tw_h(<4 x int>* %A, <4 x int>* %B) {
define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
entry:
%tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
%tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
%tmp = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
%tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
%tmp4 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
%tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
%tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
%tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
%tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
%tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
store <4 x int> %tmp9, <4 x int>* %A
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
%tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
%tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0 ; <<4 x i32>> [#uses=1]
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp9, <4 x i32>* %A
ret void
}
void %tw_h_flop(<4 x int>* %A, <4 x int>* %B) {
%tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
%tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
%tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
%tmp3 = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
%tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
%tmp5 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
%tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
%tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
%tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
%tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
store <4 x int> %tmp9, <4 x int>* %A
define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
%tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
%tmp5 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0 ; <<4 x i32>> [#uses=1]
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp9, <4 x i32>* %A
ret void
}
void %VMRG_UNARY_tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
entry:
%tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
%tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
%tmp3 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
%tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
%tmp5 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
%tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
%tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
%tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
%tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
%tmp11 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
%tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
%tmp13 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
%tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
%tmp15 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
%tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
%tmp17 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
%tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
store <16 x sbyte> %tmp33, <16 x sbyte>* %A
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16]
%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
%tmp5 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
%tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
%tmp7 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
%tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
%tmp9 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
%tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
%tmp11 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
%tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
%tmp13 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
%tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
%tmp15 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
%tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
%tmp17 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0 ; <<16 x i8>> [#uses=1]
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
store <16 x i8> %tmp33, <16 x i8>* %A
ret void
}
void %VMRG_UNARY_th_l(<8 x short>* %A, <8 x short>* %B) {
define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
entry:
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
%tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
%tmp3 = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
%tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
%tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
%tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
%tmp7 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
%tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
%tmp9 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
%tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
%tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
%tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
%tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
%tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
%tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
%tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
%tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
store <8 x short> %tmp17, <8 x short>* %A
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8]
%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
%tmp5 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
%tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
%tmp7 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
%tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
%tmp9 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0 ; <<8 x i16>> [#uses=1]
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp17, <8 x i16>* %A
ret void
}
void %VMRG_UNARY_tw_l(<4 x int>* %A, <4 x int>* %B) {
define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
entry:
%tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
%tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
%tmp3 = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
%tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
%tmp5 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
%tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
%tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
%tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
%tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
store <4 x int> %tmp9, <4 x int>* %A
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4]
%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
%tmp5 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0 ; <<4 x i32>> [#uses=1]
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp9, <4 x i32>* %A
ret void
}
void %VMRG_UNARY_tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
entry:
%tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
%tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
%tmp3 = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
%tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
%tmp5 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
%tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
%tmp7 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
%tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
%tmp9 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
%tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
%tmp11 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
%tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
%tmp13 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
%tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
%tmp15 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
%tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
%tmp17 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
%tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
%tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
store <16 x sbyte> %tmp33, <16 x sbyte>* %A
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16]
%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
%tmp5 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
%tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
%tmp7 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
%tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
%tmp9 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
%tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
%tmp11 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
%tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
%tmp13 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
%tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
%tmp15 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
%tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
%tmp17 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0 ; <<16 x i8>> [#uses=1]
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
store <16 x i8> %tmp33, <16 x i8>* %A
ret void
}
void %VMRG_UNARY_th_h(<8 x short>* %A, <8 x short>* %B) {
define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
entry:
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
%tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
%tmp3 = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
%tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
%tmp5 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
%tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
%tmp7 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
%tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
%tmp9 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
%tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
%tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
%tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
%tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
%tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
%tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
%tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
%tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
store <8 x short> %tmp17, <8 x short>* %A
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8]
%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
%tmp5 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
%tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
%tmp7 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
%tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
%tmp9 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0 ; <<8 x i16>> [#uses=1]
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp17, <8 x i16>* %A
ret void
}
void %VMRG_UNARY_tw_h(<4 x int>* %A, <4 x int>* %B) {
define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
entry:
%tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
%tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
%tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
%tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
%tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
%tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
%tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
%tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
%tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
store <4 x int> %tmp9, <4 x int>* %A
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4]
%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
%tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0 ; <<4 x i32>> [#uses=1]
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp9, <4 x i32>* %A
ret void
}
void %VPCKUHUM_unary(<8 x short>* %A, <8 x short>* %B) {
define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
entry:
%tmp = load <8 x short>* %A ; <<8 x short>> [#uses=2]
%tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
%tmp3 = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
%tmp = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
%tmp4 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
%tmp5 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
%tmp6 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
%tmp7 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
%tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
%tmp9 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
%tmp10 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
%tmp11 = extractelement <16 x sbyte> %tmp3, uint 1 ; <sbyte> [#uses=1]
%tmp12 = extractelement <16 x sbyte> %tmp3, uint 3 ; <sbyte> [#uses=1]
%tmp13 = extractelement <16 x sbyte> %tmp3, uint 5 ; <sbyte> [#uses=1]
%tmp14 = extractelement <16 x sbyte> %tmp3, uint 7 ; <sbyte> [#uses=1]
%tmp15 = extractelement <16 x sbyte> %tmp3, uint 9 ; <sbyte> [#uses=1]
%tmp16 = extractelement <16 x sbyte> %tmp3, uint 11 ; <sbyte> [#uses=1]
%tmp17 = extractelement <16 x sbyte> %tmp3, uint 13 ; <sbyte> [#uses=1]
%tmp18 = extractelement <16 x sbyte> %tmp3, uint 15 ; <sbyte> [#uses=1]
%tmp19 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
%tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 1 ; <<16 x sbyte>> [#uses=1]
%tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 2 ; <<16 x sbyte>> [#uses=1]
%tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 3 ; <<16 x sbyte>> [#uses=1]
%tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 4 ; <<16 x sbyte>> [#uses=1]
%tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 5 ; <<16 x sbyte>> [#uses=1]
%tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 6 ; <<16 x sbyte>> [#uses=1]
%tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 7 ; <<16 x sbyte>> [#uses=1]
%tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 8 ; <<16 x sbyte>> [#uses=1]
%tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 9 ; <<16 x sbyte>> [#uses=1]
%tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 10 ; <<16 x sbyte>> [#uses=1]
%tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 11 ; <<16 x sbyte>> [#uses=1]
%tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 12 ; <<16 x sbyte>> [#uses=1]
%tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 13 ; <<16 x sbyte>> [#uses=1]
%tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 14 ; <<16 x sbyte>> [#uses=1]
%tmp34 = insertelement <16 x sbyte> %tmp33, sbyte %tmp18, uint 15 ; <<16 x sbyte>> [#uses=1]
%tmp34 = cast <16 x sbyte> %tmp34 to <8 x short> ; <<8 x short>> [#uses=1]
store <8 x short> %tmp34, <8 x short>* %A
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=2]
%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
%tmp3 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3 ; <i8> [#uses=1]
%tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5 ; <i8> [#uses=1]
%tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7 ; <i8> [#uses=1]
%tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9 ; <i8> [#uses=1]
%tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11 ; <i8> [#uses=1]
%tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13 ; <i8> [#uses=1]
%tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15 ; <i8> [#uses=1]
%tmp11 = extractelement <16 x i8> %tmp3, i32 1 ; <i8> [#uses=1]
%tmp12 = extractelement <16 x i8> %tmp3, i32 3 ; <i8> [#uses=1]
%tmp13 = extractelement <16 x i8> %tmp3, i32 5 ; <i8> [#uses=1]
%tmp14 = extractelement <16 x i8> %tmp3, i32 7 ; <i8> [#uses=1]
%tmp15 = extractelement <16 x i8> %tmp3, i32 9 ; <i8> [#uses=1]
%tmp16 = extractelement <16 x i8> %tmp3, i32 11 ; <i8> [#uses=1]
%tmp17 = extractelement <16 x i8> %tmp3, i32 13 ; <i8> [#uses=1]
%tmp18 = extractelement <16 x i8> %tmp3, i32 15 ; <i8> [#uses=1]
%tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0 ; <<16 x i8>> [#uses=1]
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1 ; <<16 x i8>> [#uses=1]
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2 ; <<16 x i8>> [#uses=1]
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3 ; <<16 x i8>> [#uses=1]
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4 ; <<16 x i8>> [#uses=1]
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5 ; <<16 x i8>> [#uses=1]
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6 ; <<16 x i8>> [#uses=1]
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7 ; <<16 x i8>> [#uses=1]
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8 ; <<16 x i8>> [#uses=1]
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9 ; <<16 x i8>> [#uses=1]
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10 ; <<16 x i8>> [#uses=1]
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11 ; <<16 x i8>> [#uses=1]
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12 ; <<16 x i8>> [#uses=1]
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13 ; <<16 x i8>> [#uses=1]
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14 ; <<16 x i8>> [#uses=1]
%tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15 ; <<16 x i8>> [#uses=1]
%tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16> ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A
ret void
}
void %VPCKUWUM_unary(<4 x int>* %A, <4 x int>* %B) {
define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
entry:
%tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
%tmp = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
%tmp3 = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
%tmp = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
%tmp4 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
%tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
%tmp6 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
%tmp7 = extractelement <8 x short> %tmp3, uint 1 ; <short> [#uses=1]
%tmp8 = extractelement <8 x short> %tmp3, uint 3 ; <short> [#uses=1]
%tmp9 = extractelement <8 x short> %tmp3, uint 5 ; <short> [#uses=1]
%tmp10 = extractelement <8 x short> %tmp3, uint 7 ; <short> [#uses=1]
%tmp11 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
%tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 1 ; <<8 x short>> [#uses=1]
%tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 2 ; <<8 x short>> [#uses=1]
%tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 3 ; <<8 x short>> [#uses=1]
%tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 4 ; <<8 x short>> [#uses=1]
%tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 5 ; <<8 x short>> [#uses=1]
%tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 6 ; <<8 x short>> [#uses=1]
%tmp18 = insertelement <8 x short> %tmp17, short %tmp10, uint 7 ; <<8 x short>> [#uses=1]
%tmp18 = cast <8 x short> %tmp18 to <4 x int> ; <<4 x int>> [#uses=1]
store <4 x int> %tmp18, <4 x int>* %A
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
%tmp3 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3 ; <i16> [#uses=1]
%tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5 ; <i16> [#uses=1]
%tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7 ; <i16> [#uses=1]
%tmp7 = extractelement <8 x i16> %tmp3, i32 1 ; <i16> [#uses=1]
%tmp8 = extractelement <8 x i16> %tmp3, i32 3 ; <i16> [#uses=1]
%tmp9 = extractelement <8 x i16> %tmp3, i32 5 ; <i16> [#uses=1]
%tmp10 = extractelement <8 x i16> %tmp3, i32 7 ; <i16> [#uses=1]
%tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0 ; <<8 x i16>> [#uses=1]
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1 ; <<8 x i16>> [#uses=1]
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2 ; <<8 x i16>> [#uses=1]
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3 ; <<8 x i16>> [#uses=1]
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4 ; <<8 x i16>> [#uses=1]
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5 ; <<8 x i16>> [#uses=1]
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6 ; <<8 x i16>> [#uses=1]
%tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7 ; <<8 x i16>> [#uses=1]
%tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32> ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A
ret void
}

View File

@ -1,73 +1,71 @@
; Test that vectors are scalarized/lowered correctly.
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g3 | \
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3 | \
; RUN: grep stfs | count 4
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
; RUN: grep vspltw %t | count 2
; RUN: grep vsplti %t | count 3
; RUN: grep vsplth %t | count 1
%f4 = type <4 x float>
%i4 = type <4 x int>
%f4 = type <4 x float>
%i4 = type <4 x i32>
implementation
void %splat(%f4* %P, %f4* %Q, float %X) {
%tmp = insertelement %f4 undef, float %X, uint 0
%tmp2 = insertelement %f4 %tmp, float %X, uint 1
%tmp4 = insertelement %f4 %tmp2, float %X, uint 2
%tmp6 = insertelement %f4 %tmp4, float %X, uint 3
%q = load %f4* %Q
%R = add %f4 %q, %tmp6
define void @splat(%f4* %P, %f4* %Q, float %X) {
%tmp = insertelement %f4 undef, float %X, i32 0 ; <%f4> [#uses=1]
%tmp2 = insertelement %f4 %tmp, float %X, i32 1 ; <%f4> [#uses=1]
%tmp4 = insertelement %f4 %tmp2, float %X, i32 2 ; <%f4> [#uses=1]
%tmp6 = insertelement %f4 %tmp4, float %X, i32 3 ; <%f4> [#uses=1]
%q = load %f4* %Q ; <%f4> [#uses=1]
%R = add %f4 %q, %tmp6 ; <%f4> [#uses=1]
store %f4 %R, %f4* %P
ret void
}
void %splat_i4(%i4* %P, %i4* %Q, int %X) {
%tmp = insertelement %i4 undef, int %X, uint 0
%tmp2 = insertelement %i4 %tmp, int %X, uint 1
%tmp4 = insertelement %i4 %tmp2, int %X, uint 2
%tmp6 = insertelement %i4 %tmp4, int %X, uint 3
%q = load %i4* %Q
%R = add %i4 %q, %tmp6
define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
%tmp = insertelement %i4 undef, i32 %X, i32 0 ; <%i4> [#uses=1]
%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1 ; <%i4> [#uses=1]
%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2 ; <%i4> [#uses=1]
%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3 ; <%i4> [#uses=1]
%q = load %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}
void %splat_imm_i32(%i4* %P, %i4* %Q, int %X) {
%q = load %i4* %Q
%R = add %i4 %q, <int -1, int -1, int -1, int -1>
define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) {
%q = load %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}
void %splat_imm_i16(%i4* %P, %i4* %Q, int %X) {
%q = load %i4* %Q
%R = add %i4 %q, <int 65537, int 65537, int 65537, int 65537>
define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) {
%q = load %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 > ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}
void %splat_h(short %tmp, <16 x ubyte>* %dst) {
%tmp = insertelement <8 x short> undef, short %tmp, uint 0
%tmp72 = insertelement <8 x short> %tmp, short %tmp, uint 1
%tmp73 = insertelement <8 x short> %tmp72, short %tmp, uint 2
%tmp74 = insertelement <8 x short> %tmp73, short %tmp, uint 3
%tmp75 = insertelement <8 x short> %tmp74, short %tmp, uint 4
%tmp76 = insertelement <8 x short> %tmp75, short %tmp, uint 5
%tmp77 = insertelement <8 x short> %tmp76, short %tmp, uint 6
%tmp78 = insertelement <8 x short> %tmp77, short %tmp, uint 7
%tmp78 = cast <8 x short> %tmp78 to <16 x ubyte>
store <16 x ubyte> %tmp78, <16 x ubyte>* %dst
ret void
}
void %spltish(<16 x ubyte>* %A, <16 x ubyte>* %B) {
; Gets converted to 16 x ubyte
%tmp = load <16 x ubyte>* %B
%tmp.s = cast <16 x ubyte> %tmp to <16 x sbyte>
%tmp4 = sub <16 x sbyte> %tmp.s, cast (<8 x short> < short 15, short 15, short 15, short 15, short 15, short 15, short 15, short 15 > to <16 x sbyte>)
%tmp4.u = cast <16 x sbyte> %tmp4 to <16 x ubyte>
store <16 x ubyte> %tmp4.u, <16 x ubyte>* %A
define void @splat_h(i16 %tmp, <16 x i8>* %dst) {
%tmp.upgrd.1 = insertelement <8 x i16> undef, i16 %tmp, i32 0
%tmp72 = insertelement <8 x i16> %tmp.upgrd.1, i16 %tmp, i32 1
%tmp73 = insertelement <8 x i16> %tmp72, i16 %tmp, i32 2
%tmp74 = insertelement <8 x i16> %tmp73, i16 %tmp, i32 3
%tmp75 = insertelement <8 x i16> %tmp74, i16 %tmp, i32 4
%tmp76 = insertelement <8 x i16> %tmp75, i16 %tmp, i32 5
%tmp77 = insertelement <8 x i16> %tmp76, i16 %tmp, i32 6
%tmp78 = insertelement <8 x i16> %tmp77, i16 %tmp, i32 7
%tmp78.upgrd.2 = bitcast <8 x i16> %tmp78 to <16 x i8>
store <16 x i8> %tmp78.upgrd.2, <16 x i8>* %dst
ret void
}
define void @spltish(<16 x i8>* %A, <16 x i8>* %B) {
%tmp = load <16 x i8>* %B ; <<16 x i8>> [#uses=1]
%tmp.s = bitcast <16 x i8> %tmp to <16 x i8> ; <<16 x i8>> [#uses=1]
%tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16
15, i16 15, i16 15 > to <16 x i8>) ; <<16 x i8>> [#uses=1]
%tmp4.u = bitcast <16 x i8> %tmp4 to <16 x i8> ; <<16 x i8>> [#uses=1]
store <16 x i8> %tmp4.u, <16 x i8>* %A
ret void
}

View File

@ -1,14 +1,14 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 -o %t -f
; RUN: grep vrlw %t
; RUN: not grep spr %t
; RUN: not grep vrsave %t
<4 x int> %test_rol() {
ret <4 x int> < int -11534337, int -11534337, int -11534337, int -11534337 >
define <4 x i32> @test_rol() {
ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
}
<4 x int> %test_arg(<4 x int> %A, <4 x int> %B) {
%C = add <4 x int> %A, %B
ret <4 x int> %C
define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {
%C = add <4 x i32> %A, %B ; <<4 x i32>> [#uses=1]
ret <4 x i32> %C
}

View File

@ -1,8 +1,9 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vxor
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep vxor
void %foo(<4 x float> *%P) {
%T = load <4 x float> * %P
%S = add <4 x float> zeroinitializer, %T
store <4 x float> %S, <4 x float>* %P
ret void
define void @foo(<4 x float>* %P) {
%T = load <4 x float>* %P ; <<4 x float>> [#uses=1]
%S = add <4 x float> zeroinitializer, %T ; <<4 x float>> [#uses=1]
store <4 x float> %S, <4 x float>* %P
ret void
}

View File

@ -1,16 +1,17 @@
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep test:
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vperm
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | grep test:
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 | not grep vperm
void %test(<4 x float> *%tmp2.i) {
%tmp2.i = load <4x float>* %tmp2.i
%xFloat0.48 = extractelement <4 x float> %tmp2.i, uint 0 ; <float> [#uses=1]
%inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, uint 0 ; <<4 x float>> [#uses=1]
%xFloat1.50 = extractelement <4 x float> %tmp2.i, uint 1 ; <float> [#uses=1]
%inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, uint 1 ; <<4 x float>> [#uses=1]
%xFloat2.53 = extractelement <4 x float> %tmp2.i, uint 2 ; <float> [#uses=1]
%inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, uint 2 ; <<4 x float>> [#uses=1]
%xFloat3.56 = extractelement <4 x float> %tmp2.i, uint 3 ; <float> [#uses=1]
%inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, uint 3 ; <<4 x float>> [#uses=4]
store <4 x float> %inFloat3.58, <4x float>* %tmp2.i
ret void
define void @test(<4 x float>* %tmp2.i) {
%tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i ; <<4 x float>> [#uses=4]
%xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0 ; <float> [#uses=1]
%inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0 ; <<4 x float>> [#uses=1]
%xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1 ; <float> [#uses=1]
%inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, i32 1 ; <<4 x float>> [#uses=1]
%xFloat2.53 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 2 ; <float> [#uses=1]
%inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, i32 2 ; <<4 x float>> [#uses=1]
%xFloat3.56 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 3 ; <float> [#uses=1]
%inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, i32 3 ; <<4 x float>> [#uses=1]
store <4 x float> %inFloat3.58, <4 x float>* %tmp2.i
ret void
}

View File

@ -1,156 +1,157 @@
; Test that vectors are scalarized/lowered correctly.
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5
; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g3
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g3
%d8 = type <8 x double>
%f1 = type <1 x float>
%f2 = type <2 x float>
%f4 = type <4 x float>
%i4 = type <4 x int>
%f8 = type <8 x float>
%d8 = type <8 x double>
implementation
%i4 = type <4 x i32>
;;; TEST HANDLING OF VARIOUS VECTOR SIZES
void %test_f1(%f1 *%P, %f1* %Q, %f1 *%S) {
%p = load %f1 *%P
%q = load %f1* %Q
%R = add %f1 %p, %q
store %f1 %R, %f1 *%S
ret void
define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
%p = load %f1* %P ; <%f1> [#uses=1]
%q = load %f1* %Q ; <%f1> [#uses=1]
%R = add %f1 %p, %q ; <%f1> [#uses=1]
store %f1 %R, %f1* %S
ret void
}
void %test_f2(%f2 *%P, %f2* %Q, %f2 *%S) {
%p = load %f2* %P
%q = load %f2* %Q
%R = add %f2 %p, %q
store %f2 %R, %f2 *%S
ret void
define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
%p = load %f2* %P ; <%f2> [#uses=1]
%q = load %f2* %Q ; <%f2> [#uses=1]
%R = add %f2 %p, %q ; <%f2> [#uses=1]
store %f2 %R, %f2* %S
ret void
}
void %test_f4(%f4 *%P, %f4* %Q, %f4 *%S) {
%p = load %f4* %P
%q = load %f4* %Q
%R = add %f4 %p, %q
store %f4 %R, %f4 *%S
ret void
define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
%p = load %f4* %P ; <%f4> [#uses=1]
%q = load %f4* %Q ; <%f4> [#uses=1]
%R = add %f4 %p, %q ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
void %test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
%p = load %f8* %P
%q = load %f8* %Q
%R = add %f8 %p, %q
store %f8 %R, %f8 *%S
ret void
define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
%p = load %f8* %P ; <%f8> [#uses=1]
%q = load %f8* %Q ; <%f8> [#uses=1]
%R = add %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
void %test_fmul(%f8 *%P, %f8* %Q, %f8 *%S) {
%p = load %f8* %P
%q = load %f8* %Q
%R = mul %f8 %p, %q
store %f8 %R, %f8 *%S
ret void
define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
%p = load %f8* %P ; <%f8> [#uses=1]
%q = load %f8* %Q ; <%f8> [#uses=1]
%R = mul %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
void %test_div(%f8 *%P, %f8* %Q, %f8 *%S) {
%p = load %f8* %P
%q = load %f8* %Q
%R = div %f8 %p, %q
store %f8 %R, %f8 *%S
ret void
define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
%p = load %f8* %P ; <%f8> [#uses=1]
%q = load %f8* %Q ; <%f8> [#uses=1]
%R = fdiv %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
;;; TEST VECTOR CONSTRUCTS
void %test_cst(%f4 *%P, %f4 *%S) {
%p = load %f4* %P
%R = add %f4 %p, <float 0x3FB99999A0000000, float 1.0, float 2.0, float 4.5>
store %f4 %R, %f4 *%S
ret void
define void @test_cst(%f4* %P, %f4* %S) {
%p = load %f4* %P ; <%f4> [#uses=1]
%R = add %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
2.000000e+00, float 4.500000e+00 > ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
void %test_zero(%f4 *%P, %f4 *%S) {
%p = load %f4* %P
%R = add %f4 %p, zeroinitializer
store %f4 %R, %f4 *%S
ret void
define void @test_zero(%f4* %P, %f4* %S) {
%p = load %f4* %P ; <%f4> [#uses=1]
%R = add %f4 %p, zeroinitializer ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
void %test_undef(%f4 *%P, %f4 *%S) {
%p = load %f4* %P
%R = add %f4 %p, undef
store %f4 %R, %f4 *%S
ret void
define void @test_undef(%f4* %P, %f4* %S) {
%p = load %f4* %P ; <%f4> [#uses=1]
%R = add %f4 %p, undef ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
void %test_constant_insert(%f4 *%S) {
%R = insertelement %f4 zeroinitializer, float 10.0, uint 0
store %f4 %R, %f4 *%S
ret void
define void @test_constant_insert(%f4* %S) {
%R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0
; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
void %test_variable_buildvector(float %F, %f4 *%S) {
%R = insertelement %f4 zeroinitializer, float %F, uint 0
store %f4 %R, %f4 *%S
ret void
define void @test_variable_buildvector(float %F, %f4* %S) {
%R = insertelement %f4 zeroinitializer, float %F, i32 0
store %f4 %R, %f4* %S
ret void
}
void %test_scalar_to_vector(float %F, %f4 *%S) {
%R = insertelement %f4 undef, float %F, uint 0 ;; R = scalar_to_vector F
store %f4 %R, %f4 *%S
ret void
define void @test_scalar_to_vector(float %F, %f4* %S) {
%R = insertelement %f4 undef, float %F, i32 0
store %f4 %R, %f4* %S
ret void
}
float %test_extract_elt(%f8 *%P) {
%p = load %f8* %P
%R = extractelement %f8 %p, uint 3
ret float %R
define float @test_extract_elt(%f8* %P) {
%p = load %f8* %P ; <%f8> [#uses=1]
%R = extractelement %f8 %p, i32 3 ; <float> [#uses=1]
ret float %R
}
double %test_extract_elt2(%d8 *%P) {
%p = load %d8* %P
%R = extractelement %d8 %p, uint 3
ret double %R
define double @test_extract_elt2(%d8* %P) {
%p = load %d8* %P ; <%d8> [#uses=1]
%R = extractelement %d8 %p, i32 3 ; <double> [#uses=1]
ret double %R
}
void %test_cast_1(<4 x float>* %b, <4 x int>* %a) {
%tmp = load <4 x float>* %b
%tmp2 = add <4 x float> %tmp, <float 1.0, float 2.0, float 3.0, float 4.0>
%tmp3 = cast <4 x float> %tmp2 to <4 x int>
%tmp4 = add <4 x int> %tmp3, <int 1, int 2, int 3, int 4>
store <4 x int> %tmp4, <4 x int>* %a
ret void
define void @test_cast_1(%f4* %b, %i4* %a) {
%tmp = load %f4* %b ; <%f4> [#uses=1]
%tmp2 = add %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float
3.000000e+00, float 4.000000e+00 > ; <%f4> [#uses=1]
%tmp3 = bitcast %f4 %tmp2 to %i4 ; <%i4> [#uses=1]
%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >
store %i4 %tmp4, %i4* %a
ret void
}
void %test_cast_2(<8 x float>* %a, <8 x int>* %b) {
%T = load <8 x float>* %a
%T2 = cast <8 x float> %T to <8 x int>
store <8 x int> %T2, <8 x int>* %b
ret void
define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
%T = load %f8* %a ; <%f8> [#uses=1]
%T2 = bitcast %f8 %T to <8 x i32>
store <8 x i32> %T2, <8 x i32>* %b
ret void
}
;;; TEST IMPORTANT IDIOMS
void %splat(%f4* %P, %f4* %Q, float %X) {
%tmp = insertelement %f4 undef, float %X, uint 0
%tmp2 = insertelement %f4 %tmp, float %X, uint 1
%tmp4 = insertelement %f4 %tmp2, float %X, uint 2
%tmp6 = insertelement %f4 %tmp4, float %X, uint 3
%q = load %f4* %Q
%R = add %f4 %q, %tmp6
define void @splat(%f4* %P, %f4* %Q, float %X) {
%tmp = insertelement %f4 undef, float %X, i32 0
%tmp2 = insertelement %f4 %tmp, float %X, i32 1
%tmp4 = insertelement %f4 %tmp2, float %X, i32 2
%tmp6 = insertelement %f4 %tmp4, float %X, i32 3
%q = load %f4* %Q ; <%f4> [#uses=1]
%R = add %f4 %q, %tmp6 ; <%f4> [#uses=1]
store %f4 %R, %f4* %P
ret void
}
void %splat_i4(%i4* %P, %i4* %Q, int %X) {
%tmp = insertelement %i4 undef, int %X, uint 0
%tmp2 = insertelement %i4 %tmp, int %X, uint 1
%tmp4 = insertelement %i4 %tmp2, int %X, uint 2
%tmp6 = insertelement %i4 %tmp4, int %X, uint 3
%q = load %i4* %Q
%R = add %i4 %q, %tmp6
define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
%tmp = insertelement %i4 undef, i32 %X, i32 0
%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1
%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2
%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3
%q = load %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}