implement v?Sqr as y =__mm256_mul_pd(a, a)
This commit is contained in:
parent
e31e919a96
commit
5ef9a2fdc9
|
@ -46,7 +46,8 @@ OPENVML_EXPORT void OpenVML_FUNCNAME(vdSub)(const VML_INT n, const double * a, c
|
|||
OPENVML_EXPORT void OpenVML_FUNCNAME(vcSub)(const VML_INT n, const float * a, const float * b, float * y);
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME(vzSub)(const VML_INT n, const double * a, const double * b, double * y);
|
||||
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME(vsSqr)(const VML_INT n, const double * a, double * y);
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME(vsSqr)(const VML_INT n, const float * a, float * y);
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME(vdSqr)(const VML_INT n, const double * a, double * y);
|
||||
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME(vsPow)(const VML_INT n, const float * a, const float * b, float * y);
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME(vdPow)(const VML_INT n, const double * a, const double * b, double * y);
|
||||
|
|
|
@ -42,6 +42,7 @@ void OpenVML_FUNCNAME(csub_k)(VMLLONG n, float * a, float * b, float * y, float
|
|||
void OpenVML_FUNCNAME(zsub_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);
|
||||
|
||||
void OpenVML_FUNCNAME(ssqr_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
|
||||
void OpenVML_FUNCNAME(sdqr_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);
|
||||
|
||||
void OpenVML_FUNCNAME(spow_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
|
||||
void OpenVML_FUNCNAME(dpow_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);
|
||||
|
|
|
@ -43,7 +43,8 @@
|
|||
#define CSUB_K OpenVML_FUNCNAME(csub_k)
|
||||
#define ZSUB_K OpenVML_FUNCNAME(zsub_k)
|
||||
|
||||
#define ZSQR_K OpenVML_FUNCNAME(ssqr_k)
|
||||
#define SSQR_K OpenVML_FUNCNAME(ssqr_k)
|
||||
#define DSQR_K OpenVML_FUNCNAME(dsqr_k)
|
||||
|
||||
#define SPOW_K OpenVML_FUNCNAME(spow_k)
|
||||
#define DPOW_K OpenVML_FUNCNAME(dpow_k)
|
||||
|
@ -116,6 +117,7 @@
|
|||
#ifndef DOUBLE
|
||||
#define ADD_K SADD_K
|
||||
#define SUB_K SSUB_K
|
||||
#define SQR_K SSQR_K
|
||||
#define POW_K SPOW_K
|
||||
#define POWX_K SPOWX_K
|
||||
#define EXP_K SEXP_K
|
||||
|
@ -136,6 +138,7 @@
|
|||
#else
|
||||
#define ADD_K DADD_K
|
||||
#define SUB_K DSUB_K
|
||||
#define SQR_K DSQR_K
|
||||
#define POW_K DPOW_K
|
||||
#define POWX_K DPOWX_K
|
||||
#define EXP_K DEXP_K
|
||||
|
|
|
@ -47,6 +47,7 @@ OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vcSub)(const VML_INT n, const float * a
|
|||
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vzSub)(const VML_INT n, const double * a, const double * b, double * y);
|
||||
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsSqr)(const VML_INT n, const float * a, float * y);
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdSqr)(const VML_INT n, const double * a, double * y);
|
||||
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsPow)(const VML_INT n, const float * a, const float * b, float * y);
|
||||
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdPow)(const VML_INT n, const double * a, const double * b, double * y);
|
||||
|
|
|
@ -6,7 +6,7 @@ set(OpenVML_LIBSRC_C "")
|
|||
set(OpenVML_LIBSRC_Z "")
|
||||
|
||||
set(REAL_INTERFACE_LIST
|
||||
add sub
|
||||
add sub sqr
|
||||
pow powx pow2o3 pow3o2 exp expm1
|
||||
tanh
|
||||
log10 ln log1p
|
||||
|
@ -108,4 +108,4 @@ Endforeach(INTERFACE)
|
|||
|
||||
add_library(openvml_interface_core OBJECT ${OpenVML_LIBSRC_S} ${OpenVML_LIBSRC_D} ${OpenVML_LIBSRC_C} ${OpenVML_LIBSRC_Z} ${OpenVML_LIBSRC_OTHER})
|
||||
|
||||
target_compile_definitions(openvml_interface_core PUBLIC openvml_EXPORTS)
|
||||
target_compile_definitions(openvml_interface_core PUBLIC openvml_EXPORTS)
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/* * Copyright (c) 2014, 2015 Zhang Xianyi
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <openvml.h>
|
||||
#include <openvml_driver.h>
|
||||
#include <openvml_kernel.h>
|
||||
|
||||
|
||||
void CNAME(const VML_INT n, const VML_FLOAT * a, VML_FLOAT * y) {
|
||||
|
||||
if (n<=0) return;
|
||||
if (a==NULL || y==NULL) return;
|
||||
|
||||
|
||||
EXEC_VML(0, SQR_K, n, (VML_FLOAT*)a, NULL, y, NULL, NULL);
|
||||
|
||||
}
|
|
@ -7,7 +7,7 @@ set(OpenVML_LIBSRC_C "")
|
|||
set(OpenVML_LIBSRC_Z "")
|
||||
|
||||
#s,d
|
||||
set(KERNEL_LIST add sub pow powx exp expm1 tanh log10 ln log1p floor
|
||||
set(KERNEL_LIST add sub sqr pow powx exp expm1 tanh log10 ln log1p floor
|
||||
sin cos sincos tan asin acos atan atan2)
|
||||
|
||||
#c,z
|
||||
|
|
|
@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE generic/sub_kernel.c)
|
|||
set(sub_C_KERNEL_SOURCE generic/sub_kernel.c)
|
||||
set(sub_Z_KERNEL_SOURCE generic/sub_kernel.c)
|
||||
|
||||
set(sqr_S_KERNEL_SOURCE generic/sqr_kernel.c)
|
||||
set(sqr_D_KERNEL_SOURCE generic/sqr_kernel.c)
|
||||
|
||||
set(pow_S_KERNEL_SOURCE generic/pow_kernel.c)
|
||||
set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
|
||||
|
||||
|
|
|
@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE generic/sub_kernel.c)
|
|||
set(sub_C_KERNEL_SOURCE generic/sub_kernel.c)
|
||||
set(sub_Z_KERNEL_SOURCE generic/sub_kernel.c)
|
||||
|
||||
set(sqr_S_KERNEL_SOURCE generic/sqr_kernel.c)
|
||||
set(sqr_D_KERNEL_SOURCE generic/sqr_kernel.c)
|
||||
|
||||
set(pow_S_KERNEL_SOURCE generic/pow_kernel.c)
|
||||
set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
|
||||
|
||||
|
|
|
@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE ${OpenVML_ARCH}/sub_kernel.c)
|
|||
set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/sub_kernel.c)
|
||||
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/sub_kernel.c)
|
||||
|
||||
set(sqr_S_KERNEL_SOURCE ${OpenVML_ARCH}/sqr_kernel.c)
|
||||
set(sqr_D_KERNEL_SOURCE ${OpenVML_ARCH}/sqr_kernel.c)
|
||||
|
||||
set(pow_S_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c)
|
||||
set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c)
|
||||
#set(pow_C_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c)
|
||||
|
|
|
@ -8,6 +8,9 @@ set(sub_D_KERNEL_SOURCE generic/sub_kernel.c)
|
|||
set(sub_C_KERNEL_SOURCE generic/sub_kernel.c)
|
||||
set(sub_Z_KERNEL_SOURCE generic/sub_kernel.c)
|
||||
|
||||
set(sqr_S_KERNEL_SOURCE generic/sqr_kernel.c)
|
||||
set(sqr_D_KERNEL_SOURCE generic/sqr_kernel.c)
|
||||
|
||||
set(pow_S_KERNEL_SOURCE generic/pow_kernel.c)
|
||||
set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
|
||||
|
||||
|
|
|
@ -16,6 +16,9 @@ set(sub_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
|
|||
set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/ssub_kernel_avx.c)
|
||||
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
|
||||
|
||||
set(sqr_S_KERNEL_SOURCE ${OpenVML_ARCH}/ssqr_kernel_avx.c)
|
||||
set(sqr_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsqr_kernel_avx.c)
|
||||
|
||||
set(pow_S_KERNEL_SOURCE ${OpenVML_ARCH}/spow_kernel_avx.c)
|
||||
set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/dpow_kernel_avx.c)
|
||||
|
||||
|
|
|
@ -16,6 +16,9 @@ set(sub_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
|
|||
set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/ssub_kernel_avx.c)
|
||||
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
|
||||
|
||||
set(sqr_S_KERNEL_SOURCE ${OpenVML_ARCH}/ssqr_kernel_avx.c)
|
||||
set(sqr_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsqr_kernel_avx.c)
|
||||
|
||||
set(pow_S_KERNEL_SOURCE ${OpenVML_ARCH}/spow_kernel_avx.c)
|
||||
set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/dpow_kernel_avx.c)
|
||||
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/* * Copyright (c) 2014, 2015 Zhang Xianyi
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "openvml_kernel.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
|
||||
VMLLONG loop_count=(COMPSIZE*n) >> 5;
|
||||
VMLLONG remain_count=(COMPSIZE*n) & 0x1f;
|
||||
|
||||
int i=0;
|
||||
|
||||
while(loop_count>0){
|
||||
|
||||
__m256d av0=_mm256_loadu_pd(a);
|
||||
__m256d av1=_mm256_loadu_pd(a+4);
|
||||
__m256d av2=_mm256_loadu_pd(a+8);
|
||||
__m256d av3=_mm256_loadu_pd(a+12);
|
||||
|
||||
__m256d av4=_mm256_loadu_pd(a+16);
|
||||
__m256d av5=_mm256_loadu_pd(a+20);
|
||||
__m256d av6=_mm256_loadu_pd(a+24);
|
||||
__m256d av7=_mm256_loadu_pd(a+28);
|
||||
|
||||
|
||||
__m256d yv0=_mm256_mul_pd(av0, av0);
|
||||
__m256d yv1=_mm256_mul_pd(av1, av1);
|
||||
__m256d yv2=_mm256_mul_pd(av2, av2);
|
||||
__m256d yv3=_mm256_mul_pd(av3, av3);
|
||||
|
||||
__m256d yv4=_mm256_mul_pd(av4, av4);
|
||||
__m256d yv5=_mm256_mul_pd(av5, av5);
|
||||
__m256d yv6=_mm256_mul_pd(av6, av6);
|
||||
__m256d yv7=_mm256_mul_pd(av7, av7);
|
||||
|
||||
_mm256_storeu_pd(y, yv0);
|
||||
_mm256_storeu_pd(y+4, yv1);
|
||||
_mm256_storeu_pd(y+8, yv2);
|
||||
_mm256_storeu_pd(y+12, yv3);
|
||||
|
||||
_mm256_storeu_pd(y+16, yv4);
|
||||
_mm256_storeu_pd(y+20, yv5);
|
||||
_mm256_storeu_pd(y+24, yv6);
|
||||
_mm256_storeu_pd(y+28, yv7);
|
||||
|
||||
a+=32;
|
||||
y+=32;
|
||||
loop_count--;
|
||||
}
|
||||
|
||||
for(i=0; i<remain_count; i++){
|
||||
y[i]=a[i]*a[i];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
/* * Copyright (c) 2014, 2015 Zhang Xianyi
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "openvml_kernel.h"
|
||||
|
||||
void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
|
||||
VMLLONG i=0;
|
||||
for(i=0; i<COMPSIZE*n; i++){
|
||||
y[i]=a[i]*a[i];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/* * Copyright (c) 2014, 2015 Zhang Xianyi
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "openvml_kernel.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
|
||||
VMLLONG loop_count=(COMPSIZE*n) >> 5;
|
||||
VMLLONG remain_count=(COMPSIZE*n) & 0x1f;
|
||||
|
||||
int i=0;
|
||||
|
||||
while(loop_count>0){
|
||||
|
||||
__m256 av0=_mm256_loadu_ps(a);
|
||||
__m256 av1=_mm256_loadu_ps(a+8);
|
||||
__m256 av2=_mm256_loadu_ps(a+16);
|
||||
__m256 av3=_mm256_loadu_ps(a+24);
|
||||
|
||||
|
||||
__m256 yv0=_mm256_mul_ps(av0, av0);
|
||||
__m256 yv1=_mm256_mul_ps(av1, av1);
|
||||
__m256 yv2=_mm256_mul_ps(av2, av2);
|
||||
__m256 yv3=_mm256_mul_ps(av3, av3);
|
||||
|
||||
|
||||
_mm256_storeu_ps(y, yv0);
|
||||
_mm256_storeu_ps(y+8, yv1);
|
||||
_mm256_storeu_ps(y+16, yv2);
|
||||
_mm256_storeu_ps(y+24, yv3);
|
||||
|
||||
a+=32;
|
||||
y+=32;
|
||||
loop_count--;
|
||||
}
|
||||
|
||||
for(i=0; i<remain_count; i++){
|
||||
y[i]=a[i]*a[i];
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
set(OpenVML_REF_SRC
|
||||
vadd.c
|
||||
vsub.c
|
||||
vsqr.c
|
||||
vpow.c
|
||||
vpowx.c
|
||||
vpow2o3.c
|
||||
|
@ -27,4 +28,4 @@ if(NOT MSVC)
|
|||
target_link_libraries(${OpenVML_LIBNAME}_ref m)
|
||||
endif()
|
||||
|
||||
target_compile_definitions(${OpenVML_LIBNAME}_ref PUBLIC openvml_EXPORTS)
|
||||
target_compile_definitions(${OpenVML_LIBNAME}_ref PUBLIC openvml_EXPORTS)
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/* * Copyright (c) 2014, 2015 Zhang Xianyi
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <openvml_reference.h>
|
||||
|
||||
void OpenVML_FUNCNAME_REF(vsSqr)(const VML_INT n, const float * a, float * y){
|
||||
VML_INT i;
|
||||
if (n<=0) return;
|
||||
if (a==NULL || y==NULL) return;
|
||||
|
||||
for(i=0; i<n; i++){
|
||||
y[i]=a[i]*a[i];
|
||||
}
|
||||
}
|
||||
|
||||
void OpenVML_FUNCNAME_REF(vdSqr)(const VML_INT n, const double * a, double * y){
|
||||
VML_INT i;
|
||||
if (n<=0) return;
|
||||
if (a==NULL || y==NULL) return;
|
||||
|
||||
for(i=0; i<n; i++){
|
||||
y[i]=a[i]*a[i];
|
||||
}
|
||||
}
|
||||
|
|
@ -28,21 +28,21 @@
|
|||
#include <string.h>
|
||||
#include <openvml_reference.h>
|
||||
|
||||
static char* funcname[4]={"vsSqr", "vdSqr", "vcSqr","vzSqr"};
|
||||
static double flop_per_elem[4]={1.0, 1.0, 2.0, 2.0};
|
||||
static char* funcname[4]={"vsSqr", "vdSqr", NULL, NULL};
|
||||
static double flop_per_elem[4]={1.0, 1.0, 0, 0};
|
||||
|
||||
static a_y_func_t ref_vsqr[] = {
|
||||
(a_y_func_t)OpenVML_FUNCNAME_REF(vsSqr),
|
||||
(a_y_func_t)OpenVML_FUNCNAME_REF(vdSqr),
|
||||
(a_y_func_t)OpenVML_FUNCNAME_REF(vcSqr),
|
||||
(a_y_func_t)OpenVML_FUNCNAME_REF(vzSqr),
|
||||
NULL,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static a_y_func_t test_vsqr[] = {
|
||||
(a_y_func_t)OpenVML_FUNCNAME(vsSqr),
|
||||
(a_y_func_t)OpenVML_FUNCNAME(vdSqr),
|
||||
(a_y_func_t)OpenVML_FUNCNAME(vcSqr),
|
||||
(a_y_func_t)OpenVML_FUNCNAME(vzSqr),
|
||||
NULL,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
||||
|
@ -54,10 +54,3 @@ CTEST2(check_result_d, sqr){
|
|||
run_test_a_y(data->parameter, funcname, test_vsqr, ref_vsqr, flop_per_elem);
|
||||
}
|
||||
|
||||
CTEST2(check_result_c, sqr){
|
||||
run_test_a_y(data->parameter, funcname, test_vsqr, ref_vsqr, flop_per_elem);
|
||||
}
|
||||
|
||||
CTEST2(check_result_z, sqr){
|
||||
run_test_a_y(data->parameter, funcname, test_vsqr, ref_vsqr, flop_per_elem);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue