tests for OpenCL kernel (non working)

This commit is contained in:
Jerome Kieffer 2012-10-12 17:01:14 +02:00
parent 974488ee3a
commit 87426d38c4
2 changed files with 101 additions and 49 deletions

View File

@ -40,9 +40,11 @@
#endif
#ifdef ENABLE_FP64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// #pragma OPENCL EXTENSION cl_khr_fp64 : enable
typedef double bigfloat_t;
#else
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
// #pragma OPENCL EXTENSION cl_khr_fp64 : disable
typedef float bigfloat_t;
#endif
#define GROUP_SIZE BLOCK_SIZE
@ -56,43 +58,46 @@
* Values of 0 in the mask are processed and values of 1 ignored as per PyFAI
*
* @param weights Float pointer to global memory storing the input image.
* @param do_dummy bint: shall the dummy pixel be checked. Dummy pixel are pixels marked as bad and ignored
* @param binarray UINTType Pointer to global memory with the uweights array.
* @param tth_min_max Float pointer to global memory of size 2 (vector) storing the min and max values
* for 2th +- d2th.
* @param intensity Float pointer to global memory where the input image resides.
* @param histogram UINTType Pointer to global memory with the uhistogram array.
* @param span_range Float pointer to global memory with the max values of spans per group.
* @param mask Int pointer to global memory with the mask to be used.
* @param tth_range Float pointer to global memory of size 2 (vector) storing the min and max for integration.
* If tth range is not specified the this array points to tth_min_max.
* @param bins Unsigned int: number of output bins wanted (and pre-calculated)
* @param lut_size Unsigned int: dimension of the look-up table
* @param lut_idx Unsigned integers pointer to an array of with the index of input pixels
* @param lut_coef Float pointer to an array of coefficients for each input pixel
* @param do_dummy Bool/int: shall the dummy pixel be checked. Dummy pixel are pixels marked as bad and ignored
* @param dummy Float: value for bad pixels
* @param delta_dummy Float: precision for bad pixel value
* @param do_dark Bool/int: shall dark-current correction be applied ?
* @param dark Float pointer to global memory storing the dark image.
* @param do_flat Bool/int: shall flat-field correction be applied ? (could contain polarization corrections)
* @param flat Float pointer to global memory storing the flat image.
* @param outData Float pointer to the output 1D array with the weighted histogram
* @param outCount Float pointer to the output 1D array with the unweighted histogram
* @param outMerged Float pointer to the output 1D array with the diffractogram
*/
__kernel void
lut_integrate( const __global float *weights,
const __global uint bins,
const __global uint lut_size,
uint bins,
uint lut_size,
const __global uint *lut_idx,
const __global float *lut_coef,
const int do_dummy,
const float dummy,
const float delta_dummy,
const int do_dark,
int do_dummy,
float dummy,
float delta_dummy,
int do_dark,
const __global float *dark,
const int do_flat,
int do_flat,
const __global float *flat,
__global double *outData,
__global double *outCount,
__global double *outMerge
__global float *outData,
__global float *outCount,
__global float *outMerge
)
{
uint k, j, i= get_global_id(0);
int idx
double sum_data = 0.0;
double sum_count = 0.0;
const double epsilon = 1e-10
float coef, data
if(gid < bins)
uint idx, k, j, i= get_global_id(0);
bigfloat_t sum_data = 0.0;
bigfloat_t sum_count = 0.0;
const bigfloat_t epsilon = 1e-10;
float coef, data;
if(i < bins)
{
for (j=0;j<lut_size;j++)
{
@ -101,23 +106,23 @@ lut_integrate( const __global float *weights,
coef = lut_coef[k];
if((idx <= 0) && (coef <= 0.0))
break;
data = weight[idx];
data = weights[idx];
if( (!do_dummy) || (delta_dummy && (fabs(data-dummy) > delta_dummy))|| (data!=dummy) )
{
if(do_dark)
data -= dark[idx];
if do_flat:
if(do_flat)
data /= flat[idx];
sum_data += coef * data;
sum_count += coef;
}//test dummy
}//for j
outData[i] = sum_data;
outCount[i] = sum_count;
};//test dummy
};//for j
outData[i] = (float) sum_data;
outCount[i] = (float) sum_count;
if (sum_count > epsilon)
outMerge[i] = sum_data / sum_count;
}//if bins
}//end kernel
outMerge[i] = (float) sum_data / sum_count;
};//if bins
};//end kernel

View File

@ -1,10 +1,11 @@
#!/usr/bin/python
import os, time
import os, time, numpy
import pyFAI, fabio
root = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "test", "testimages")
spline = os.path.join(root, "halfccd.spline")
poni = os.path.join(root, "LaB6.poni")
bins = 2048
res = []
with open(poni, "r") as f:
for l in f:
@ -18,14 +19,14 @@ edf = os.path.join(root, "LaB6_0020.edf")
img = fabio.open(edf)
ai = pyFAI.load(poni)
ai.xrpd(img.data, 2048)
ai.xrpd(img.data, bins)
tth = ai._ttha.ravel().astype("float32")
dtth = ai._dttha.ravel().astype("float32")
data = img.data.ravel().astype("float32")
import splitBBox
t0 = time.time()
ra, rb, rc, rd = splitBBox.histoBBox1d(data, tth, dtth, bins=2048)
ra, rb, rc, rd = splitBBox.histoBBox1d(data, tth, dtth, bins=bins)
t1 = time.time()
ref_time = t1 - t0
print("ref time: %.3fs" % ref_time)
@ -43,7 +44,7 @@ import splitBBoxLUT
#a, b, c, d, ee = splitBBoxLUT.histoBBox1d(data, tth, dtth, bins=2048)
#print "LUT max =", ee.max()
t0 = time.time()
integ = splitBBoxLUT.HistoBBox1d(tth, dtth, bins=2048)
integ = splitBBoxLUT.HistoBBox1d(tth, dtth, bins=bins)
t1 = time.time()
a, b, c, d = integ.integrate(data)
t2 = time.time()
@ -55,7 +56,53 @@ t2 = time.time()
print "speed-up:", ref_time / (t2 - t1)
from pylab import *
#plot(ee)
plot(a, b)
plot(ra, rb)
plot(a, b, label="LUT")
plot(ra, rb, label="Original")
import pyopencl
mf = pyopencl.mem_flags
ctx = pyopencl.create_some_context()
q = pyopencl.CommandQueue(ctx)
program = pyopencl.Program(ctx, open("../openCL/ocl_azim_LUT.cl").read()).build()
t3 = time.time()
weights_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=data)
lut_idx_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_idx)
lut_coef_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_coef)
None_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.zeros(1, dtype=numpy.float32))
outData_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
outCount_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
outMerge_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
print program.all_kernels()
kernel = program.all_kernels()[0]
program.lut_integrate(q, None, None,
weights_buf,
2048,
integ.lut_size,
lut_idx_buf,
lut_coef_buf,
0,
0,
0,
0,
None_buf,
0,
None_buf,
outData_buf,
outCount_buf,
outMerge_buf)
b = numpy.empty(bins, dtype=numpy.float32)
c = numpy.empty(bins, dtype=numpy.float32)
d = numpy.empty(bins, dtype=numpy.float32)
pyopencl.enqueue_read_buffer(q, outData_buf, c).wait()
pyopencl.enqueue_read_buffer(q, outCount_buf, d).wait()
pyopencl.enqueue_read_buffer(q, outMerge_buf, b).wait()
t4 = time.time()
print "speed-up:", ref_time / (t4 - t3)
from pylab import *
#plot(ee)
plot(a, b, label="OpenCL")
show()
raw_input("Enter")