mirror of https://github.com/silx-kit/pyFAI.git
tests for OpenCL kernel (non working)
This commit is contained in:
parent
974488ee3a
commit
87426d38c4
|
@ -40,9 +40,11 @@
|
|||
#endif
|
||||
|
||||
#ifdef ENABLE_FP64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
// #pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
typedef double bigfloat_t;
|
||||
#else
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
|
||||
// #pragma OPENCL EXTENSION cl_khr_fp64 : disable
|
||||
typedef float bigfloat_t;
|
||||
#endif
|
||||
|
||||
#define GROUP_SIZE BLOCK_SIZE
|
||||
|
@ -56,43 +58,46 @@
|
|||
* Values of 0 in the mask are processed and values of 1 ignored as per PyFAI
|
||||
*
|
||||
* @param weights Float pointer to global memory storing the input image.
|
||||
* @param do_dummy bint: shall the dummy pixel be checked. Dummy pixel are pixels marked as bad and ignored
|
||||
* @param binarray UINTType Pointer to global memory with the uweights array.
|
||||
* @param tth_min_max Float pointer to global memory of size 2 (vector) storing the min and max values
|
||||
* for 2th +- d2th.
|
||||
* @param intensity Float pointer to global memory where the input image resides.
|
||||
* @param histogram UINTType Pointer to global memory with the uhistogram array.
|
||||
* @param span_range Float pointer to global memory with the max values of spans per group.
|
||||
* @param mask Int pointer to global memory with the mask to be used.
|
||||
* @param tth_range Float pointer to global memory of size 2 (vector) storing the min and max for integration.
|
||||
* If tth range is not specified the this array points to tth_min_max.
|
||||
* @param bins Unsigned int: number of output bins wanted (and pre-calculated)
|
||||
* @param lut_size Unsigned int: dimension of the look-up table
|
||||
* @param lut_idx Unsigned integers pointer to an array of with the index of input pixels
|
||||
* @param lut_coef Float pointer to an array of coefficients for each input pixel
|
||||
* @param do_dummy Bool/int: shall the dummy pixel be checked. Dummy pixel are pixels marked as bad and ignored
|
||||
* @param dummy Float: value for bad pixels
|
||||
* @param delta_dummy Float: precision for bad pixel value
|
||||
* @param do_dark Bool/int: shall dark-current correction be applied ?
|
||||
* @param dark Float pointer to global memory storing the dark image.
|
||||
* @param do_flat Bool/int: shall flat-field correction be applied ? (could contain polarization corrections)
|
||||
* @param flat Float pointer to global memory storing the flat image.
|
||||
* @param outData Float pointer to the output 1D array with the weighted histogram
|
||||
* @param outCount Float pointer to the output 1D array with the unweighted histogram
|
||||
* @param outMerged Float pointer to the output 1D array with the diffractogram
|
||||
|
||||
*/
|
||||
__kernel void
|
||||
lut_integrate( const __global float *weights,
|
||||
const __global uint bins,
|
||||
const __global uint lut_size,
|
||||
uint bins,
|
||||
uint lut_size,
|
||||
const __global uint *lut_idx,
|
||||
const __global float *lut_coef,
|
||||
const int do_dummy,
|
||||
const float dummy,
|
||||
const float delta_dummy,
|
||||
const int do_dark,
|
||||
int do_dummy,
|
||||
float dummy,
|
||||
float delta_dummy,
|
||||
int do_dark,
|
||||
const __global float *dark,
|
||||
const int do_flat,
|
||||
int do_flat,
|
||||
const __global float *flat,
|
||||
__global double *outData,
|
||||
__global double *outCount,
|
||||
__global double *outMerge
|
||||
__global float *outData,
|
||||
__global float *outCount,
|
||||
__global float *outMerge
|
||||
)
|
||||
{
|
||||
|
||||
uint k, j, i= get_global_id(0);
|
||||
int idx
|
||||
double sum_data = 0.0;
|
||||
double sum_count = 0.0;
|
||||
const double epsilon = 1e-10
|
||||
float coef, data
|
||||
if(gid < bins)
|
||||
uint idx, k, j, i= get_global_id(0);
|
||||
bigfloat_t sum_data = 0.0;
|
||||
bigfloat_t sum_count = 0.0;
|
||||
const bigfloat_t epsilon = 1e-10;
|
||||
float coef, data;
|
||||
if(i < bins)
|
||||
{
|
||||
for (j=0;j<lut_size;j++)
|
||||
{
|
||||
|
@ -101,23 +106,23 @@ lut_integrate( const __global float *weights,
|
|||
coef = lut_coef[k];
|
||||
if((idx <= 0) && (coef <= 0.0))
|
||||
break;
|
||||
data = weight[idx];
|
||||
data = weights[idx];
|
||||
if( (!do_dummy) || (delta_dummy && (fabs(data-dummy) > delta_dummy))|| (data!=dummy) )
|
||||
{
|
||||
if(do_dark)
|
||||
data -= dark[idx];
|
||||
if do_flat:
|
||||
if(do_flat)
|
||||
data /= flat[idx];
|
||||
|
||||
sum_data += coef * data;
|
||||
sum_count += coef;
|
||||
|
||||
}//test dummy
|
||||
}//for j
|
||||
outData[i] = sum_data;
|
||||
outCount[i] = sum_count;
|
||||
};//test dummy
|
||||
};//for j
|
||||
outData[i] = (float) sum_data;
|
||||
outCount[i] = (float) sum_count;
|
||||
if (sum_count > epsilon)
|
||||
outMerge[i] = sum_data / sum_count;
|
||||
}//if bins
|
||||
}//end kernel
|
||||
outMerge[i] = (float) sum_data / sum_count;
|
||||
};//if bins
|
||||
};//end kernel
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
#!/usr/bin/python
|
||||
import os, time
|
||||
import os, time, numpy
|
||||
import pyFAI, fabio
|
||||
|
||||
root = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "test", "testimages")
|
||||
spline = os.path.join(root, "halfccd.spline")
|
||||
poni = os.path.join(root, "LaB6.poni")
|
||||
bins = 2048
|
||||
res = []
|
||||
with open(poni, "r") as f:
|
||||
for l in f:
|
||||
|
@ -18,14 +19,14 @@ edf = os.path.join(root, "LaB6_0020.edf")
|
|||
|
||||
img = fabio.open(edf)
|
||||
ai = pyFAI.load(poni)
|
||||
ai.xrpd(img.data, 2048)
|
||||
ai.xrpd(img.data, bins)
|
||||
tth = ai._ttha.ravel().astype("float32")
|
||||
dtth = ai._dttha.ravel().astype("float32")
|
||||
data = img.data.ravel().astype("float32")
|
||||
|
||||
import splitBBox
|
||||
t0 = time.time()
|
||||
ra, rb, rc, rd = splitBBox.histoBBox1d(data, tth, dtth, bins=2048)
|
||||
ra, rb, rc, rd = splitBBox.histoBBox1d(data, tth, dtth, bins=bins)
|
||||
t1 = time.time()
|
||||
ref_time = t1 - t0
|
||||
print("ref time: %.3fs" % ref_time)
|
||||
|
@ -43,7 +44,7 @@ import splitBBoxLUT
|
|||
#a, b, c, d, ee = splitBBoxLUT.histoBBox1d(data, tth, dtth, bins=2048)
|
||||
#print "LUT max =", ee.max()
|
||||
t0 = time.time()
|
||||
integ = splitBBoxLUT.HistoBBox1d(tth, dtth, bins=2048)
|
||||
integ = splitBBoxLUT.HistoBBox1d(tth, dtth, bins=bins)
|
||||
t1 = time.time()
|
||||
a, b, c, d = integ.integrate(data)
|
||||
t2 = time.time()
|
||||
|
@ -55,7 +56,53 @@ t2 = time.time()
|
|||
print "speed-up:", ref_time / (t2 - t1)
|
||||
from pylab import *
|
||||
#plot(ee)
|
||||
plot(a, b)
|
||||
plot(ra, rb)
|
||||
plot(a, b, label="LUT")
|
||||
plot(ra, rb, label="Original")
|
||||
|
||||
import pyopencl
|
||||
|
||||
mf = pyopencl.mem_flags
|
||||
ctx = pyopencl.create_some_context()
|
||||
q = pyopencl.CommandQueue(ctx)
|
||||
program = pyopencl.Program(ctx, open("../openCL/ocl_azim_LUT.cl").read()).build()
|
||||
t3 = time.time()
|
||||
weights_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=data)
|
||||
lut_idx_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_idx)
|
||||
lut_coef_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_coef)
|
||||
None_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.zeros(1, dtype=numpy.float32))
|
||||
outData_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
|
||||
outCount_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
|
||||
outMerge_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
|
||||
print program.all_kernels()
|
||||
kernel = program.all_kernels()[0]
|
||||
|
||||
program.lut_integrate(q, None, None,
|
||||
weights_buf,
|
||||
2048,
|
||||
integ.lut_size,
|
||||
lut_idx_buf,
|
||||
lut_coef_buf,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
None_buf,
|
||||
0,
|
||||
None_buf,
|
||||
outData_buf,
|
||||
outCount_buf,
|
||||
outMerge_buf)
|
||||
b = numpy.empty(bins, dtype=numpy.float32)
|
||||
c = numpy.empty(bins, dtype=numpy.float32)
|
||||
d = numpy.empty(bins, dtype=numpy.float32)
|
||||
pyopencl.enqueue_read_buffer(q, outData_buf, c).wait()
|
||||
pyopencl.enqueue_read_buffer(q, outCount_buf, d).wait()
|
||||
pyopencl.enqueue_read_buffer(q, outMerge_buf, b).wait()
|
||||
t4 = time.time()
|
||||
print "speed-up:", ref_time / (t4 - t3)
|
||||
from pylab import *
|
||||
#plot(ee)
|
||||
plot(a, b, label="OpenCL")
|
||||
|
||||
show()
|
||||
raw_input("Enter")
|
||||
|
|
Loading…
Reference in New Issue