// CHECK: define available_externally <2 x i64> @_mm_adds_epi16(<2 x i64> [[REG64:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG65:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG64]], <2 x i64>* [[REG66:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG65]], <2 x i64>* [[REG67:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG68:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG66]], align 16
// CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG68]] to <8 x i16>
// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG67]], align 16
// CHECK-NEXT: [[REG71:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG70]] to <8 x i16>
// CHECK-NEXT: [[REG72:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> [[REG69]], <8 x i16> [[REG71]])
// CHECK-NEXT: [[REG73:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG72]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG73]]
// CHECK: define available_externally <2 x i64> @_mm_adds_epi8(<2 x i64> [[REG74:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG75:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG74]], <2 x i64>* [[REG76:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG75]], <2 x i64>* [[REG77:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG78:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG76]], align 16
// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG78]] to <16 x i8>
// CHECK-NEXT: [[REG80:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG77]], align 16
// CHECK-NEXT: [[REG81:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG80]] to <16 x i8>
// CHECK-NEXT: [[REG82:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])(<16 x i8> [[REG79]], <16 x i8> [[REG81]])
// CHECK-NEXT: [[REG83:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG82]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG83]]
// CHECK: define available_externally <2 x i64> @_mm_adds_epu16(<2 x i64> [[REG84:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG85:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG84]], <2 x i64>* [[REG86:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG85]], <2 x i64>* [[REG87:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG86]], align 16
// CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG88]] to <8 x i16>
// CHECK-NEXT: [[REG90:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG87]], align 16
// CHECK-NEXT: [[REG91:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG90]] to <8 x i16>
// CHECK-NEXT: [[REG92:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG89]], <8 x i16> [[REG91]])
// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG92]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG93]]
// CHECK: define available_externally <2 x i64> @_mm_adds_epu8(<2 x i64> [[REG94:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG95:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG94]], <2 x i64>* [[REG96:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG95]], <2 x i64>* [[REG97:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG98:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG96]], align 16
// CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG98]] to <16 x i8>
// CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG97]], align 16
// CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG100]] to <16 x i8>
// CHECK-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG99]], <16 x i8> [[REG101]])
// CHECK-NEXT: [[REG103:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG102]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG103]]
void__attribute__((noinline))
test_avg(){
resi=_mm_avg_epu16(mi1,mi2);
resi=_mm_avg_epu8(mi1,mi2);
}
// CHECK-LABEL: @test_avg
// CHECK: define available_externally <2 x i64> @_mm_avg_epu16(<2 x i64> [[REG104:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG105:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG104]], <2 x i64>* [[REG106:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG105]], <2 x i64>* [[REG107:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG108:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG106]], align 16
// CHECK-NEXT: [[REG109:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG108]] to <8 x i16>
// CHECK-NEXT: [[REG110:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG107]], align 16
// CHECK-NEXT: [[REG111:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG110]] to <8 x i16>
// CHECK-NEXT: [[REG112:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG109]], <8 x i16> [[REG111]])
// CHECK-NEXT: [[REG113:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG112]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG113]]
// CHECK: define available_externally <2 x i64> @_mm_avg_epu8(<2 x i64> [[REG114:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG115:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG114]], <2 x i64>* [[REG116:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG115]], <2 x i64>* [[REG117:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG118:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG116]], align 16
// CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG118]] to <16 x i8>
// CHECK-NEXT: [[REG120:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG117]], align 16
// CHECK-NEXT: [[REG121:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG120]] to <16 x i8>
// CHECK-NEXT: [[REG122:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG119]], <16 x i8> [[REG121]])
// CHECK-NEXT: [[REG123:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG122]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG123]]
void__attribute__((noinline))
test_bs(){
resi=_mm_bslli_si128(mi1,i);
resi=_mm_bsrli_si128(mi1,i);
}
// CHECK-LABEL: @test_bs
// CHECK: define available_externally <2 x i64> @_mm_bslli_si128(<2 x i64> [[REG124:[0-9a-zA-Z_%.]+]], i32 signext [[REG125:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG124]], <2 x i64>* [[REG126:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store i32 [[REG125]], i32* [[REG127:[0-9a-zA-Z_%.]+]], align 4
// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG128:[0-9a-zA-Z_%.]+]], align 16
// CHECK-BE: [[REG182:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG180]], align 16
// CHECK-BE-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG182]] to <2 x i64>
// CHECK-BE-NEXT: ret <2 x i64> [[REG183]]
void__attribute__((noinline))
test_cast(){
res=_mm_castpd_ps(md1);
resi=_mm_castpd_si128(md1);
resd=_mm_castps_pd(m1);
resi=_mm_castps_si128(m1);
resd=_mm_castsi128_pd(mi1);
res=_mm_castsi128_ps(mi1);
}
// CHECK-LABEL: @test_cast
// CHECK: define available_externally <4 x float> @_mm_castpd_ps(<2 x double> [[REG184:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG184]], <2 x double>* [[REG185:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG185]], align 16
// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG186]] to <4 x float>
// CHECK-NEXT: ret <4 x float> [[REG187]]
// CHECK: define available_externally <2 x i64> @_mm_castpd_si128(<2 x double> [[REG188:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG188]], <2 x double>* [[REG189:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG190:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG189]], align 16
// CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG190]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG191]]
// CHECK: define available_externally <2 x double> @_mm_castps_pd(<4 x float> [[REG192:[0-9a-zA-Z_%.]+]])
// CHECK: store <4 x float> [[REG192]], <4 x float>* [[REG193:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG193]], align 16
// CHECK-NEXT: [[REG195:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG194]] to <2 x double>
// CHECK-NEXT: ret <2 x double> [[REG195]]
// CHECK: define available_externally <2 x i64> @_mm_castps_si128(<4 x float> [[REG196:[0-9a-zA-Z_%.]+]])
// CHECK: store <4 x float> [[REG196]], <4 x float>* [[REG197:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG197]], align 16
// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG198]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG199]]
// CHECK: define available_externally <2 x double> @_mm_castsi128_pd(<2 x i64> [[REG200:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG200]], <2 x i64>* [[REG201:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG201]], align 16
// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG202]] to <2 x double>
// CHECK-NEXT: ret <2 x double> [[REG203]]
// CHECK: define available_externally <4 x float> @_mm_castsi128_ps(<2 x i64> [[REG204:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG204]], <2 x i64>* [[REG205:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG205]], align 16
// CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG206]] to <4 x float>
// CHECK-NEXT: ret <4 x float> [[REG207]]
void__attribute__((noinline))
test_cmp(){
resi=_mm_cmpeq_epi32(mi1,mi2);
resi=_mm_cmpeq_epi16(mi1,mi2);
resi=_mm_cmpeq_epi8(mi1,mi2);
resi=_mm_cmpgt_epi32(mi1,mi2);
resi=_mm_cmpgt_epi16(mi1,mi2);
resi=_mm_cmpgt_epi8(mi1,mi2);
resi=_mm_cmplt_epi32(mi1,mi2);
resi=_mm_cmplt_epi16(mi1,mi2);
resi=_mm_cmplt_epi8(mi1,mi2);
resd=_mm_cmpeq_pd(md1,md2);
resd=_mm_cmpeq_sd(md1,md2);
resd=_mm_cmpge_pd(md1,md2);
resd=_mm_cmpge_sd(md1,md2);
resd=_mm_cmpgt_pd(md1,md2);
resd=_mm_cmpgt_sd(md1,md2);
resd=_mm_cmple_pd(md1,md2);
resd=_mm_cmple_sd(md1,md2);
resd=_mm_cmplt_pd(md1,md2);
resd=_mm_cmplt_sd(md1,md2);
resd=_mm_cmpneq_pd(md1,md2);
resd=_mm_cmpneq_sd(md1,md2);
resd=_mm_cmpnge_pd(md1,md2);
resd=_mm_cmpnge_sd(md1,md2);
resd=_mm_cmpngt_pd(md1,md2);
resd=_mm_cmpngt_sd(md1,md2);
resd=_mm_cmpnle_pd(md1,md2);
resd=_mm_cmpnle_sd(md1,md2);
resd=_mm_cmpnlt_pd(md1,md2);
resd=_mm_cmpnlt_sd(md1,md2);
resd=_mm_cmpord_pd(md1,md2);
resd=_mm_cmpord_sd(md1,md2);
resd=_mm_cmpunord_pd(md1,md2);
resd=_mm_cmpunord_sd(md1,md2);
}
// CHECK-LABEL: @test_cmp
// CHECK: define available_externally <2 x i64> @_mm_cmpeq_epi32
// CHECK: [[REG208:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])
// CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG208]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG209]]
// CHECK: define available_externally <2 x i64> @_mm_cmpeq_epi16
// CHECK: [[REG210:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])(<8 x i16> {{[0-9a-zA-Z_%.]+}}, <8 x i16> {{[0-9a-zA-Z_%.]+}})
// CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG210]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG211]]
// CHECK: define available_externally <2 x i64> @_mm_cmpeq_epi8
// CHECK: [[REG212:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpeq(signed char vector[16], signed char vector[16])
// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG212]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG213]]
// CHECK: define available_externally <2 x i64> @_mm_cmpgt_epi32
// CHECK: [[REG214:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])
// CHECK-NEXT: [[REG215:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG214]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG215]]
// CHECK: define available_externally <2 x i64> @_mm_cmpgt_epi16
// CHECK: [[REG216:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
// CHECK-NEXT: [[REG217:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG216]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG217]]
// CHECK: define available_externally <2 x i64> @_mm_cmpgt_epi8
// CHECK: [[REG218:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])
// CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG218]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG219]]
// CHECK: define available_externally <2 x i64> @_mm_cmplt_epi32
// CHECK: [[REG220:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(int vector[4], int vector[4])
// CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG220]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG221]]
// CHECK: define available_externally <2 x i64> @_mm_cmplt_epi16
// CHECK: [[REG222:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
// CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG222]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG223]]
// CHECK: define available_externally <2 x i64> @_mm_cmplt_epi8
// CHECK: [[REG224:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])
// CHECK-NEXT: [[REG225:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG224]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG225]]
// CHECK: define available_externally <2 x double> @_mm_cmpeq_pd
// CHECK: define available_externally <2 x double> @_mm_cmpord_pd(<2 x double> [[REG485:[0-9a-zA-Z_%.]+]], <2 x double> [[REG486:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG485]], <2 x double>* [[REG487:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG486]], <2 x double>* [[REG488:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG487]], align 16
// CHECK-NEXT: [[REG490:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG487]], align 16
// CHECK-NEXT: [[REG491:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> [[REG489]], <2 x double> [[REG490]])
// CHECK-NEXT: store <2 x i64> [[REG491]], <2 x i64>* [[REG492:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG488]], align 16
// CHECK-NEXT: [[REG494:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG488]], align 16
// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> [[REG493]], <2 x double> [[REG494]])
// CHECK-NEXT: store <2 x i64> [[REG495]], <2 x i64>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG492]], align 16
// CHECK-NEXT: [[REG498:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG496]], align 16
// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_and(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG497]], <2 x i64> [[REG498]])
// CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG499]] to <2 x double>
// CHECK-NEXT: ret <2 x double> [[REG500]]
// CHECK: define available_externally <2 x double> @_mm_cmpord_sd(<2 x double> [[REG501:[0-9a-zA-Z_%.]+]], <2 x double> [[REG502:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG501]], <2 x double>* [[REG503:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG502]], <2 x double>* [[REG504:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG503]], align 16
// CHECK: define available_externally <2 x double> @_mm_cmpunord_pd(<2 x double> [[REG518:[0-9a-zA-Z_%.]+]], <2 x double> [[REG519:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG518]], <2 x double>* [[REG520:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG519]], <2 x double>* [[REG521:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG522:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG520]], align 16
// CHECK-NEXT: [[REG523:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG520]], align 16
// CHECK-NEXT: [[REG524:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> [[REG522]], <2 x double> [[REG523]])
// CHECK-NEXT: store <2 x i64> [[REG524]], <2 x i64>* [[REG525:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG526:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG521]], align 16
// CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG521]], align 16
// CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> [[REG526]], <2 x double> [[REG527]])
// CHECK-NEXT: store <2 x i64> [[REG528]], <2 x i64>* [[REG529:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG530:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG525]], align 16
// CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG525]], align 16
// CHECK-NEXT: [[REG532:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_nor(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG530]], <2 x i64> [[REG531]])
// CHECK-NEXT: store <2 x i64> [[REG532]], <2 x i64>* [[REG525]], align 16
// CHECK-NEXT: [[REG533:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG525]], align 16
// CHECK-NEXT: [[REG534:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG529]], align 16
// CHECK-NEXT: [[REG535:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_orc(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG533]], <2 x i64> [[REG534]])
// CHECK-NEXT: [[REG536:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG535]] to <2 x double>
// CHECK-NEXT: ret <2 x double> [[REG536]]
// CHECK: define available_externally <2 x double> @_mm_cmpunord_sd(<2 x double> [[REG537:[0-9a-zA-Z_%.]+]], <2 x double> [[REG538:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG537]], <2 x double>* [[REG539:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG538]], <2 x double>* [[REG540:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG539]], align 16
// CHECK: define available_externally <4 x float> @_mm_cvtepi32_ps(<2 x i64> [[REG588:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG588]], <2 x i64>* [[REG589:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG589]], align 16
// CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG590]] to <4 x i32>
// CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG591]], i32 0)
// CHECK-NEXT: ret <4 x float> [[REG592]]
// CHECK: define available_externally <2 x i64> @_mm_cvtpd_epi32(<2 x double> [[REG593:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG593]], <2 x double>* [[REG594:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: %[[REG595:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG594]], align 16
// CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_rint(double vector[2])(<2 x double> %[[REG595:[0-9a-zA-Z_%.]+]])
// CHECK-NEXT: store <2 x double> [[REG596]], <2 x double>* [[REG597:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG598:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG597]], align 16
// CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> [[REG599]])
// CHECK-NEXT: store <4 x i32> [[REG600]], <4 x i32>* [[REG601:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG602:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG601]], align 16
// CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG601]], align 16
// CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(int vector[4], int vector[4])(<4 x i32> [[REG602]], <4 x i32> [[REG603]])
// CHECK-NEXT: store <4 x i32> [[REG604]], <4 x i32>* [[REG601]], align 16
// CHECK-NEXT: [[REG605:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG601]], align 16
// CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG605]] to <2 x i64>
// CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> [[REG606]], <2 x i64> zeroinitializer)
// CHECK-NEXT: store <4 x i32> [[REG607]], <4 x i32>* [[REG608:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG608]], align 16
// CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG609]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG610]]
// CHECK: define available_externally i64 @_mm_cvtpd_pi32(<2 x double> [[REG611:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG611]], <2 x double>* [[REG612:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG613:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG612]], align 16
// CHECK-NEXT: [[REG614:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_cvtpd_epi32(<2 x double> [[REG613]])
// CHECK-NEXT: store <2 x i64> [[REG614]], <2 x i64>* [[REG615:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG616:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG615]], align 16
// CHECK: define available_externally <4 x float> @_mm_cvtpd_ps(<2 x double> [[REG618:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG618]], <2 x double>* [[REG619:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG620:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: %[[REG621:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG619]], align 16
// CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = call <4 x i32> asm "xvcvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %[[REG621:[0-9a-zA-Z_%.]+]])
// CHECK-NEXT: store <4 x i32> [[REG622]], <4 x i32>* [[REG623:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG624:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG623]], align 16
// CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG623]], align 16
// CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(int vector[4], int vector[4])(<4 x i32> [[REG624]], <4 x i32> [[REG625]])
// CHECK-NEXT: store <4 x i32> [[REG626]], <4 x i32>* [[REG623]], align 16
// CHECK-NEXT: [[REG627:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG623]], align 16
// CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG627]] to <2 x i64>
// CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> [[REG628]], <2 x i64> zeroinitializer)
// CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG629]] to <4 x float>
// CHECK-NEXT: store <4 x float> [[REG630]], <4 x float>* [[REG631:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG632:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG631]], align 16
// CHECK-NEXT: ret <4 x float> [[REG632]]
// CHECK: define available_externally <2 x double> @_mm_cvtpi32_pd(i64 [[REG633:[0-9a-zA-Z_%.]+]])
// CHECK: store i64 [[REG633]], i64* [[REG634:[0-9a-zA-Z_%.]+]], align 8
// CHECK-BE-NEXT: store <2 x double> [[REG814]], <2 x double>* [[REG809]], align 16
// CHECK-BE-NEXT: [[REG815:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG809]], align 16
// CHECK-BE-NEXT: ret <2 x double> [[REG815]]
// CHECK-LE-NEXT: [[REG816:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG807]], align 16
// CHECK-LE-NEXT: [[REG817:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG816:[0-9a-zA-Z_%.]+]], i32 zeroext 0)
// CHECK-LE-NEXT: store <4 x float> [[REG817]], <4 x float>* [[REG818:[0-9a-zA-Z_%.]+]], align 16
// CHECK-LE-NEXT: [[REG819:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG818]], align 16
// CHECK-LE-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = call <2 x double> asm "xscvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> [[REG819]])
// CHECK-LE-NEXT: store <2 x double> [[REG820]], <2 x double>* [[REG809:[0-9a-zA-Z_%.]+]], align 16
// CHECK-LE-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG809]], align 16
// CHECK-LE-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG806]], align 16
// CHECK-LE-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])(<2 x double> [[REG821]], <2 x double> [[REG822]])
// CHECK-LE-NEXT: ret <2 x double> [[REG823]]
// CHECK: define available_externally <2 x i64> @_mm_cvttpd_epi32(<2 x double> [[REG824:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG824]], <2 x double>* [[REG825:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG826:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: %[[REG827:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG825]], align 16
// CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %[[REG827:[0-9a-zA-Z_%.]+]])
// CHECK-NEXT: store <4 x i32> [[REG828]], <4 x i32>* [[REG829:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG830:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG829]], align 16
// CHECK-NEXT: [[REG831:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG829]], align 16
// CHECK-NEXT: [[REG832:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(int vector[4], int vector[4])(<4 x i32> [[REG830]], <4 x i32> [[REG831]])
// CHECK-NEXT: store <4 x i32> [[REG832]], <4 x i32>* [[REG829]], align 16
// CHECK-NEXT: [[REG833:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG829]], align 16
// CHECK-NEXT: [[REG834:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG833]] to <2 x i64>
// CHECK-NEXT: [[REG835:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> [[REG834]], <2 x i64> zeroinitializer)
// CHECK-NEXT: store <4 x i32> [[REG835]], <4 x i32>* [[REG836:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG837:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG836]], align 16
// CHECK-NEXT: [[REG838:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG837]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG838]]
// CHECK: define available_externally i64 @_mm_cvttpd_pi32(<2 x double> [[REG839:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG839]], <2 x double>* [[REG840:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG841:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG840]], align 16
// CHECK-NEXT: [[REG842:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_cvttpd_epi32(<2 x double> [[REG841]])
// CHECK-NEXT: store <2 x i64> [[REG842]], <2 x i64>* [[REG843:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG844:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG843]], align 16
// CHECK: define available_externally <2 x i64> @_mm_loadu_si128(<2 x i64>* [[REG984:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64>* [[REG984]], <2 x i64>** [[REG985:[0-9a-zA-Z_%.]+]], align 8
// CHECK-NEXT: [[REG986:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG985]], align 8
// CHECK-NEXT: [[REG987:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64>* [[REG986]] to i32*
// CHECK-NEXT: [[REG988:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vsx_ld(int, int const*)(i32 signext 0, i32* [[REG987]])
// CHECK-NEXT: [[REG989:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG988]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG989]]
void__attribute__((noinline))
test_logical(){
resd=_mm_and_pd(md1,md2);
resi=_mm_and_si128(mi1,mi2);
resd=_mm_andnot_pd(md1,md2);
resi=_mm_andnot_si128(mi1,mi2);
resd=_mm_xor_pd(md1,md2);
resi=_mm_xor_si128(mi1,mi2);
resd=_mm_or_pd(md1,md2);
resi=_mm_or_si128(mi1,mi2);
}
// CHECK-LABEL: @test_logical
// CHECK: define available_externally <2 x double> @_mm_and_pd(<2 x double> [[REG990:[0-9a-zA-Z_%.]+]], <2 x double> [[REG991:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG990]], <2 x double>* [[REG992:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG991]], <2 x double>* [[REG993:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG994:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG992]], align 16
// CHECK-NEXT: [[REG995:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG993]], align 16
// CHECK-NEXT: [[REG996:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_and(double vector[2], double vector[2])(<2 x double> [[REG994]], <2 x double> [[REG995]])
// CHECK-NEXT: ret <2 x double> [[REG996]]
// CHECK: define available_externally <2 x i64> @_mm_and_si128(<2 x i64> [[REG997:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG998:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG997]], <2 x i64>* [[REG999:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG998]], <2 x i64>* [[REG1000:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1001:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG999]], align 16
// CHECK-NEXT: [[REG1002:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1000]], align 16
// CHECK-NEXT: [[REG1003:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_and(long long vector[2], long long vector[2])(<2 x i64> [[REG1001]], <2 x i64> [[REG1002]])
// CHECK-NEXT: ret <2 x i64> [[REG1003]]
// CHECK: define available_externally <2 x double> @_mm_andnot_pd(<2 x double> [[REG1004:[0-9a-zA-Z_%.]+]], <2 x double> [[REG1005:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG1004]], <2 x double>* [[REG1006:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG1005]], <2 x double>* [[REG1007:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1008:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1007]], align 16
// CHECK-NEXT: [[REG1009:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1006]], align 16
// CHECK-NEXT: [[REG1010:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_andc(double vector[2], double vector[2])(<2 x double> [[REG1008]], <2 x double> [[REG1009]])
// CHECK-NEXT: ret <2 x double> [[REG1010]]
// CHECK: define available_externally <2 x i64> @_mm_andnot_si128(<2 x i64> [[REG1011:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1012:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1011]], <2 x i64>* [[REG1013:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1012]], <2 x i64>* [[REG1014:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1015:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1014]], align 16
// CHECK-NEXT: [[REG1016:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1013]], align 16
// CHECK-NEXT: [[REG1017:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_andc(long long vector[2], long long vector[2])(<2 x i64> [[REG1015]], <2 x i64> [[REG1016]])
// CHECK-NEXT: ret <2 x i64> [[REG1017]]
// CHECK: define available_externally <2 x double> @_mm_xor_pd(<2 x double> [[REG1018:[0-9a-zA-Z_%.]+]], <2 x double> [[REG1019:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG1018]], <2 x double>* [[REG1020:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG1019]], <2 x double>* [[REG1021:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1022:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1020]], align 16
// CHECK-NEXT: [[REG1023:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1021]], align 16
// CHECK-NEXT: [[REG1024:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_xor(double vector[2], double vector[2])(<2 x double> [[REG1022]], <2 x double> [[REG1023]])
// CHECK-NEXT: ret <2 x double> [[REG1024]]
// CHECK: define available_externally <2 x i64> @_mm_xor_si128(<2 x i64> [[REG1025:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1026:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1025]], <2 x i64>* [[REG1027:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1026]], <2 x i64>* [[REG1028:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1029:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1027]], align 16
// CHECK-NEXT: [[REG1030:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1028]], align 16
// CHECK-NEXT: [[REG1031:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_xor(long long vector[2], long long vector[2])(<2 x i64> [[REG1029]], <2 x i64> [[REG1030]])
// CHECK-NEXT: ret <2 x i64> [[REG1031]]
// CHECK: define available_externally <2 x double> @_mm_or_pd(<2 x double> [[REG1032:[0-9a-zA-Z_%.]+]], <2 x double> [[REG1033:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x double> [[REG1032]], <2 x double>* [[REG1034:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x double> [[REG1033]], <2 x double>* [[REG1035:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1036:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1034]], align 16
// CHECK-NEXT: [[REG1037:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1035]], align 16
// CHECK-NEXT: [[REG1038:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_or(double vector[2], double vector[2])(<2 x double> [[REG1036]], <2 x double> [[REG1037]])
// CHECK-NEXT: ret <2 x double> [[REG1038]]
// CHECK: define available_externally <2 x i64> @_mm_or_si128(<2 x i64> [[REG1039:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1040:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1039]], <2 x i64>* [[REG1041:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1040]], <2 x i64>* [[REG1042:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1043:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1041]], align 16
// CHECK-NEXT: [[REG1044:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1042]], align 16
// CHECK-NEXT: [[REG1045:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_or(long long vector[2], long long vector[2])(<2 x i64> [[REG1043]], <2 x i64> [[REG1044]])
// CHECK-NEXT: ret <2 x i64> [[REG1045]]
void__attribute__((noinline))
test_max(){
resi=_mm_max_epi16(mi1,mi2);
resi=_mm_max_epu8(mi1,mi2);
resd=_mm_max_pd(md1,md2);
resd=_mm_max_sd(md1,md2);
}
// CHECK-LABEL: @test_max
// CHECK: define available_externally <2 x i64> @_mm_max_epi16
// CHECK: [[REG1046:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_max(short vector[8], short vector[8])
// CHECK-NEXT: [[REG1047:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1046]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1047]]
// CHECK: define available_externally <2 x i64> @_mm_max_epu8
// CHECK-NEXT: [[REG1238:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1235]], align 16
// CHECK-NEXT: [[REG1239:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1238]] to <8 x i16>
// CHECK-NEXT: [[REG1240:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1236]], align 16
// CHECK-NEXT: [[REG1241:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1240]] to <8 x i16>
// CHECK-NEXT: [[REG1242:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmuleuh(<8 x i16> [[REG1239]], <8 x i16> [[REG1241]])
// CHECK-NEXT: store <4 x i32> [[REG1242]], <4 x i32>* [[REG1243:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1244:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1235]], align 16
// CHECK-NEXT: [[REG1245:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1244]] to <8 x i16>
// CHECK-NEXT: [[REG1246:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1236]], align 16
// CHECK-NEXT: [[REG1247:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1246]] to <8 x i16>
// CHECK-NEXT: [[REG1248:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulouh(<8 x i16> [[REG1245]], <8 x i16> [[REG1247]])
// CHECK-NEXT: store <4 x i32> [[REG1248]], <4 x i32>* [[REG1249:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1250:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1243]], align 16
// CHECK-NEXT: [[REG1251:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1249]], align 16
// CHECK-NEXT: [[REG1252:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1237]], align 16
// CHECK-NEXT: [[REG1253:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])(<4 x i32> [[REG1250]], <4 x i32> [[REG1251]], <16 x i8> [[REG1252]])
// CHECK-NEXT: [[REG1254:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1253]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1254]]
// CHECK: define available_externally <2 x i64> @_mm_mullo_epi16(<2 x i64> [[REG1255:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1256:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1255]], <2 x i64>* [[REG1257:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1256]], <2 x i64>* [[REG1258:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1259:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1257]], align 16
// CHECK-NEXT: [[REG1260:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1259]] to <8 x i16>
// CHECK-NEXT: [[REG1261:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1258]], align 16
// CHECK-NEXT: [[REG1262:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1261]] to <8 x i16>
// CHECK-NEXT: [[REG1263:[0-9a-zA-Z_%.]+]] = mul <8 x i16> [[REG1260]], [[REG1262]]
// CHECK-NEXT: [[REG1264:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1263]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1264]]
void__attribute__((noinline))
test_pack(){
resi=_mm_packs_epi16(mi1,mi2);
resi=_mm_packs_epi32(mi1,mi2);
resi=_mm_packus_epi16(mi1,mi2);
}
// CHECK-LABEL: @test_pack
// CHECK: define available_externally <2 x i64> @_mm_packs_epi16
// CHECK: [[REG1265:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packs(short vector[8], short vector[8])
// CHECK-NEXT: [[REG1266:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1265]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1266]]
// CHECK: define available_externally <2 x i64> @_mm_packs_epi32
// CHECK: [[REG1267:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_packs(int vector[4], int vector[4])
// CHECK-NEXT: [[REG1268:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1267]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1268]]
// CHECK: define available_externally <2 x i64> @_mm_packus_epi16
// CHECK: [[REG1269:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packsu(short vector[8], short vector[8])
// CHECK-NEXT: [[REG1270:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1269]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1270]]
void__attribute__((noinline))
test_sad(){
resi=_mm_sad_epu8(mi1,mi2);
}
// CHECK-LABEL: @test_sad
// CHECK: define available_externally <2 x i64> @_mm_sad_epu8
// CHECK-NEXT: [[REG1505:[0-9a-zA-Z_%.]+]] = and i32 [[REG1504]], 3
// CHECK-NEXT: [[REG1506:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1505]] to i64
// CHECK-NEXT: store i64 [[REG1506]], i64* [[REG1507:[0-9a-zA-Z_%.]+]], align 8
// CHECK-LE-NEXT: store <2 x i64> <i64 0, i64 2242261671028070680>, <2 x i64>* [[REG1508:[0-9a-zA-Z_%.]+]], align 16
// CHECK-BE-NEXT: store <2 x i64> <i64 0, i64 1736447835066146335>, <2 x i64>* [[REG1508:[0-9a-zA-Z_%.]+]], align 16
// CHECK-COUNT-4: [[REG1509:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shufflelo_epi16.permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
// CHECK: [[REG1510:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
// CHECK-NEXT: store <2 x i64> [[REG1510]], <2 x i64>* [[REG1511:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1512:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1511]], align 16
// CHECK-NEXT: ret <2 x i64> [[REG1512]]
void__attribute__((noinline))
test_sll(){
resi=_mm_sll_epi16(mi1,mi2);
resi=_mm_sll_epi32(mi1,mi2);
resi=_mm_sll_epi64(mi1,mi2);
resi=_mm_slli_epi16(mi1,i);
resi=_mm_slli_epi32(mi1,i);
resi=_mm_slli_epi64(mi1,i);
resi=_mm_slli_si128(mi1,i);
}
// CHECK-LABEL: @test_sll
// CHECK: define available_externally <2 x i64> @_mm_sll_epi16(<2 x i64> [[REG1513:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1514:[0-9a-zA-Z_%.]+]])
// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1515:[0-9a-zA-Z_%.]+]], align 16
// CHECK-LE: [[REG1516:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
// CHECK-BE: [[REG1516:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
// CHECK-NEXT: store <8 x i16> [[REG1516]], <8 x i16>* [[REG1517:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1518:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1517]], align 16
// CHECK-NEXT: [[REG1519:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG1518]], <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
// CHECK-NEXT: store <8 x i16> [[REG1519]], <8 x i16>* [[REG1520:[0-9a-zA-Z_%.]+]], align 16
// CHECK: [[REG1521:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8])
// CHECK-NEXT: store <8 x i16> [[REG1521]], <8 x i16>* [[REG1522:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1523:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1520]], align 16
// CHECK-NEXT: [[REG1524:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1522]], align 16
// CHECK-NEXT: [[REG1525:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1520]], align 16
// CHECK-NEXT: [[REG1526:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])(<8 x i16> [[REG1523]], <8 x i16> [[REG1524]], <8 x i16> [[REG1525]])
// CHECK-NEXT: store <8 x i16> [[REG1526]], <8 x i16>* [[REG1522]], align 16
// CHECK-NEXT: [[REG1527:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1522]], align 16
// CHECK-NEXT: [[REG1528:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1527]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1528]]
// CHECK: define available_externally <2 x i64> @_mm_sll_epi32(<2 x i64> [[REG1529:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1530:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1529]], <2 x i64>* [[REG1531:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1530]], <2 x i64>* [[REG1532:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x i32> <i32 32, i32 32, i32 32, i32 32>, <4 x i32>* [[REG1533:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1534:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1532]], align 16
// CHECK-NEXT: [[REG1535:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1534]] to <4 x i32>
// CHECK-LE-NEXT: [[REG1536:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> [[REG1535]], i32 zeroext 0)
// CHECK-BE-NEXT: [[REG1536:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> [[REG1535]], i32 zeroext 1)
// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
// CHECK: [[REG1537:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4])
// CHECK-NEXT: store <4 x i32> [[REG1537]], <4 x i32>* [[REG1538:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1539:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
// CHECK-NEXT: [[REG1540:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1538]], align 16
// CHECK-NEXT: [[REG1541:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
// CHECK-NEXT: [[REG1542:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])(<4 x i32> [[REG1539]], <4 x i32> [[REG1540]], <4 x i32> [[REG1541]])
// CHECK-NEXT: store <4 x i32> [[REG1542]], <4 x i32>* [[REG1538]], align 16
// CHECK-NEXT: [[REG1543:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1538]], align 16
// CHECK-NEXT: [[REG1544:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1543]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1544]]
// CHECK: define available_externally <2 x i64> @_mm_sll_epi64
// CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> {{[0-9a-zA-Z_%.]+}}, <2 x i64> <i64 64, i64 64>)
// CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2])
// CHECK-NEXT: [[REG1571:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1551]], align 16
// CHECK-NEXT: [[REG1572:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1571]] to <8 x i16>
// CHECK-NEXT: [[REG1573:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1566]], align 16
// CHECK-NEXT: [[REG1574:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])(<8 x i16> [[REG1572]], <8 x i16> [[REG1573]])
// CHECK-NEXT: store <8 x i16> [[REG1574]], <8 x i16>* [[REG1553]], align 16
// CHECK-NEXT: [[REG1601:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1579]], align 16
// CHECK-NEXT: [[REG1602:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1601]] to <4 x i32>
// CHECK-NEXT: [[REG1603:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1597]], align 16
// CHECK-NEXT: [[REG1604:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sl(int vector[4], unsigned int vector[4])(<4 x i32> [[REG1602]], <4 x i32> [[REG1603]])
// CHECK-NEXT: store <4 x i32> [[REG1604]], <4 x i32>* [[REG1581]], align 16
// CHECK-NEXT: [[REG1633:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1609]], align 16
// CHECK-NEXT: [[REG1634:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1628]], align 16
// CHECK-NEXT: [[REG1635:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_sl(long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG1633]], <2 x i64> [[REG1634]])
// CHECK-NEXT: store <2 x i64> [[REG1635]], <2 x i64>* [[REG1611]], align 16
// CHECK: define available_externally <2 x i64> @_mm_sra_epi16(<2 x i64> [[REG1670:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1671:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1670]], <2 x i64>* [[REG1672:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1671]], <2 x i64>* [[REG1673:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1674:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1675:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1673]], align 16
// CHECK-NEXT: [[REG1676:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1675]] to <8 x i16>
// CHECK-LE-NEXT: [[REG1677:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> [[REG1676]], i32 zeroext 0)
// CHECK-BE-NEXT: [[REG1677:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> [[REG1676]], i32 zeroext 3)
// CHECK-NEXT: store <8 x i16> [[REG1677]], <8 x i16>* [[REG1678:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1679:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1678]], align 16
// CHECK-NEXT: [[REG1680:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG1679]], <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
// CHECK-NEXT: store <8 x i16> [[REG1680]], <8 x i16>* [[REG1678]], align 16
// CHECK-NEXT: [[REG1681:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1672]], align 16
// CHECK-NEXT: [[REG1682:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1681]] to <8 x i16>
// CHECK-NEXT: [[REG1683:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1678]], align 16
// CHECK-NEXT: [[REG1684:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> [[REG1682]], <8 x i16> [[REG1683]])
// CHECK-NEXT: store <8 x i16> [[REG1684]], <8 x i16>* [[REG1685:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1686:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1685]], align 16
// CHECK-NEXT: [[REG1687:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1686]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1687]]
// CHECK: define available_externally <2 x i64> @_mm_sra_epi32(<2 x i64> [[REG1688:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1689:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1688]], <2 x i64>* [[REG1690:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1689]], <2 x i64>* [[REG1691:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, <4 x i32>* [[REG1692:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1693:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1691]], align 16
// CHECK-NEXT: [[REG1694:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1693]] to <4 x i32>
// CHECK-LE-NEXT: [[REG1695:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> [[REG1694]], i32 zeroext 0)
// CHECK-BE-NEXT: [[REG1695:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> [[REG1694]], i32 zeroext 1)
// CHECK-NEXT: store <4 x i32> [[REG1695]], <4 x i32>* [[REG1696:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1697:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1696]], align 16
// CHECK-NEXT: [[REG1698:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG1697]], <4 x i32> <i32 31, i32 31, i32 31, i32 31>)
// CHECK-NEXT: store <4 x i32> [[REG1698]], <4 x i32>* [[REG1696]], align 16
// CHECK-NEXT: [[REG1699:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1690]], align 16
// CHECK-NEXT: [[REG1700:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1699]] to <4 x i32>
// CHECK-NEXT: [[REG1701:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1696]], align 16
// CHECK-NEXT: [[REG1702:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])(<4 x i32> [[REG1700]], <4 x i32> [[REG1701]])
// CHECK-NEXT: store <4 x i32> [[REG1702]], <4 x i32>* [[REG1703:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1704:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1703]], align 16
// CHECK-NEXT: [[REG1705:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1704]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1705]]
// CHECK: define available_externally <2 x i64> @_mm_srai_epi16(<2 x i64> [[REG1706:[0-9a-zA-Z_%.]+]], i32 signext [[REG1707:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1706]], <2 x i64>* [[REG1708:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store i32 [[REG1707]], i32* [[REG1709:[0-9a-zA-Z_%.]+]], align 4
// CHECK-NEXT: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1710:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1724:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1708]], align 16
// CHECK-NEXT: [[REG1725:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1724]] to <8 x i16>
// CHECK-NEXT: [[REG1726:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1710]], align 16
// CHECK-NEXT: [[REG1727:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> [[REG1725]], <8 x i16> [[REG1726]])
// CHECK-NEXT: store <8 x i16> [[REG1727]], <8 x i16>* [[REG1728:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1729:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1728]], align 16
// CHECK-NEXT: [[REG1730:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1729]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1730]]
// CHECK: define available_externally <2 x i64> @_mm_srai_epi32(<2 x i64> [[REG1731:[0-9a-zA-Z_%.]+]], i32 signext [[REG1732:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1731]], <2 x i64>* [[REG1733:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store i32 [[REG1732]], i32* [[REG1734:[0-9a-zA-Z_%.]+]], align 4
// CHECK-NEXT: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, <4 x i32>* [[REG1735:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1755:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1733]], align 16
// CHECK-NEXT: [[REG1756:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1755]] to <4 x i32>
// CHECK-NEXT: [[REG1757:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1735]], align 16
// CHECK-NEXT: [[REG1758:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])(<4 x i32> [[REG1756]], <4 x i32> [[REG1757]])
// CHECK-NEXT: store <4 x i32> [[REG1758]], <4 x i32>* [[REG1759:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1759]], align 16
// CHECK-NEXT: [[REG1761:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1760]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1761]]
void__attribute__((noinline))
test_srl(){
resi=_mm_srl_epi16(mi1,mi2);
resi=_mm_srl_epi32(mi1,mi2);
resi=_mm_srl_epi64(mi1,mi2);
resi=_mm_srli_epi16(mi1,i);
resi=_mm_srli_epi32(mi1,i);
resi=_mm_srli_epi64(mi1,i);
resi=_mm_srli_si128(mi1,i);
}
// CHECK-LABEL: @test_srl
// CHECK: define available_externally <2 x i64> @_mm_srl_epi16(<2 x i64> [[REG1762:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1763:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1762]], <2 x i64>* [[REG1764:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1763]], <2 x i64>* [[REG1765:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1766:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1767:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1765]], align 16
// CHECK-NEXT: [[REG1768:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1767]] to <8 x i16>
// CHECK-LE-NEXT: [[REG1769:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> [[REG1768]], i32 zeroext 0)
// CHECK-BE-NEXT: [[REG1769:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> [[REG1768]], i32 zeroext 3)
// CHECK-NEXT: store <8 x i16> [[REG1769]], <8 x i16>* [[REG1770:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1771:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1770]], align 16
// CHECK-NEXT: [[REG1772:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG1771]], <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
// CHECK-NEXT: store <8 x i16> [[REG1772]], <8 x i16>* [[REG1773:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1774:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1764]], align 16
// CHECK-NEXT: [[REG1775:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1774]] to <8 x i16>
// CHECK-NEXT: [[REG1776:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1770]], align 16
// CHECK-NEXT: [[REG1777:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG1775]], <8 x i16> [[REG1776]])
// CHECK-NEXT: store <8 x i16> [[REG1777]], <8 x i16>* [[REG1778:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1779:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1773]], align 16
// CHECK-NEXT: [[REG1780:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1778]], align 16
// CHECK-NEXT: [[REG1781:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1773]], align 16
// CHECK-NEXT: [[REG1782:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])(<8 x i16> [[REG1779]], <8 x i16> [[REG1780]], <8 x i16> [[REG1781]])
// CHECK-NEXT: store <8 x i16> [[REG1782]], <8 x i16>* [[REG1778]], align 16
// CHECK-NEXT: [[REG1783:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1778]], align 16
// CHECK-NEXT: [[REG1784:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1783]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1784]]
// CHECK: define available_externally <2 x i64> @_mm_srl_epi32(<2 x i64> [[REG1785:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1786:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1785]], <2 x i64>* [[REG1787:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1786]], <2 x i64>* [[REG1788:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <4 x i32> <i32 32, i32 32, i32 32, i32 32>, <4 x i32>* [[REG1789:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1790:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1788]], align 16
// CHECK-NEXT: [[REG1791:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1790]] to <4 x i32>
// CHECK-LE-NEXT: [[REG1792:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> [[REG1791]], i32 zeroext 0)
// CHECK-BE-NEXT: [[REG1792:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> [[REG1791]], i32 zeroext 1)
// CHECK-NEXT: store <4 x i32> [[REG1792]], <4 x i32>* [[REG1793:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1794:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1793]], align 16
// CHECK-NEXT: [[REG1795:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG1794]], <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
// CHECK-NEXT: store <4 x i32> [[REG1795]], <4 x i32>* [[REG1796:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1797:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1787]], align 16
// CHECK-NEXT: [[REG1798:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1797]] to <4 x i32>
// CHECK-NEXT: [[REG1799:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1793]], align 16
// CHECK-NEXT: [[REG1800:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG1798]], <4 x i32> [[REG1799]])
// CHECK-NEXT: store <4 x i32> [[REG1800]], <4 x i32>* [[REG1801:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1802:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1796]], align 16
// CHECK-NEXT: [[REG1803:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1801]], align 16
// CHECK-NEXT: [[REG1804:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1796]], align 16
// CHECK-NEXT: [[REG1805:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])(<4 x i32> [[REG1802]], <4 x i32> [[REG1803]], <4 x i32> [[REG1804]])
// CHECK-NEXT: store <4 x i32> [[REG1805]], <4 x i32>* [[REG1801]], align 16
// CHECK-NEXT: [[REG1806:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1801]], align 16
// CHECK-NEXT: [[REG1807:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1806]] to <2 x i64>
// CHECK-NEXT: ret <2 x i64> [[REG1807]]
// CHECK: define available_externally <2 x i64> @_mm_srl_epi64(<2 x i64> [[REG1808:[0-9a-zA-Z_%.]+]], <2 x i64> [[REG1809:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1808]], <2 x i64>* [[REG1810:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> [[REG1809]], <2 x i64>* [[REG1811:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store <2 x i64> <i64 64, i64 64>, <2 x i64>* [[REG1812:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1813:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1811]], align 16
// CHECK-NEXT: [[REG1814:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> [[REG1813]], i32 zeroext 0)
// CHECK-NEXT: store <2 x i64> [[REG1814]], <2 x i64>* [[REG1815:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1816:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1815]], align 16
// CHECK-NEXT: [[REG1817:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG1816]], <2 x i64> <i64 64, i64 64>)
// CHECK-NEXT: store <2 x i64> [[REG1817]], <2 x i64>* [[REG1818:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1819:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1810]], align 16
// CHECK-NEXT: [[REG1820:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1815]], align 16
// CHECK-NEXT: [[REG1821:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG1819]], <2 x i64> [[REG1820]])
// CHECK-NEXT: store <2 x i64> [[REG1821]], <2 x i64>* [[REG1822:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1823:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1818]], align 16
// CHECK-NEXT: [[REG1824:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1823]] to <2 x double>
// CHECK-NEXT: [[REG1825:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1822]], align 16
// CHECK-NEXT: [[REG1826:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1825]] to <2 x double>
// CHECK-NEXT: [[REG1827:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1818]], align 16
// CHECK-NEXT: [[REG1828:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_sel(double vector[2], double vector[2], bool vector[2])(<2 x double> [[REG1824]], <2 x double> [[REG1826]], <2 x i64> [[REG1827]])
// CHECK-NEXT: [[REG1829:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1828]] to <2 x i64>
// CHECK-NEXT: store <2 x i64> [[REG1829]], <2 x i64>* [[REG1822]], align 16
// CHECK-NEXT: [[REG1830:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1822]], align 16
// CHECK-NEXT: ret <2 x i64> [[REG1830]]
// CHECK: define available_externally <2 x i64> @_mm_srli_epi16(<2 x i64> [[REG1831:[0-9a-zA-Z_%.]+]], i32 signext [[REG1832:[0-9a-zA-Z_%.]+]])
// CHECK: store <2 x i64> [[REG1831]], <2 x i64>* [[REG1833:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: store i32 [[REG1832]], i32* [[REG1834:[0-9a-zA-Z_%.]+]], align 4
// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* [[REG1835:[0-9a-zA-Z_%.]+]], align 16
// CHECK-NEXT: [[REG1850:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1833]], align 16
// CHECK-NEXT: [[REG1851:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1850]] to <8 x i16>
// CHECK-NEXT: [[REG1852:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1845]], align 16
// CHECK-NEXT: [[REG1853:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sr(short vector[8], unsigned short vector[8])(<8 x i16> [[REG1851]], <8 x i16> [[REG1852]])
// CHECK-NEXT: store <8 x i16> [[REG1853]], <8 x i16>* [[REG1835]], align 16
// CHECK-NEXT: [[REG1881:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1858]], align 16
// CHECK-NEXT: [[REG1882:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1881]] to <4 x i32>
// CHECK-NEXT: [[REG1883:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1874]], align 16
// CHECK-NEXT: [[REG1884:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> [[REG1882]], <4 x i32> [[REG1883]])
// CHECK-NEXT: store <4 x i32> [[REG1884]], <4 x i32>* [[REG1860]], align 16
// CHECK-NEXT: [[REG1915:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1889]], align 16
// CHECK-NEXT: [[REG1916:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1906]], align 16
// CHECK-NEXT: [[REG1917:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_sr(long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG1915]], <2 x i64> [[REG1916]])
// CHECK-NEXT: store <2 x i64> [[REG1917]], <2 x i64>* [[REG1891]], align 16