tests for OpenCL kernel (non working)

2012-10-12 17:01:14 +02:00 · 2012-10-12 17:01:14 +02:00 · 87426d38c4
parent 974488ee3a
commit 87426d38c4
2 changed files with 101 additions and 49 deletions
--- a/openCL/ocl_azim_LUT.cl
+++ b/openCL/ocl_azim_LUT.cl
@ -40,9 +40,11 @@
 #endif

 #ifdef ENABLE_FP64
-  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
+//	#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+	typedef double bigfloat_t;
 #else
-  #pragma OPENCL EXTENSION cl_khr_fp64 : disable
+//	#pragma OPENCL EXTENSION cl_khr_fp64 : disable
+	typedef float bigfloat_t;
 #endif

 #define GROUP_SIZE BLOCK_SIZE
@ -56,43 +58,46 @@
 * Values of 0 in the mask are processed and values of 1 ignored as per PyFAI
 *
 * @param weights     Float pointer to global memory storing the input image.
- * @param do_dummy    bint: shall the dummy pixel be checked. Dummy pixel are pixels marked as bad and ignored
- * @param binarray    UINTType Pointer to global memory with the uweights array.
- * @param tth_min_max Float pointer to global memory of size 2 (vector) storing the min and max values
- *                     for 2th +- d2th.
- * @param intensity   Float pointer to global memory where the input image resides.
- * @param histogram   UINTType Pointer to global memory with the uhistogram array.
- * @param span_range  Float pointer to global memory with the max values of spans per group.
- * @param mask        Int pointer to global memory with the mask to be used.
- * @param tth_range   Float pointer to global memory of size 2 (vector) storing the min and max for integration.
- *                     If tth range is not specified the this array points to tth_min_max.
+ * @param bins        Unsigned int: number of output bins wanted (and pre-calculated)
+ * @param lut_size    Unsigned int: dimension of the look-up table
+ * @param lut_idx     Unsigned integers pointer to an array of with the index of input pixels
+ * @param lut_coef    Float pointer to an array of coefficients for each input pixel
+ * @param do_dummy    Bool/int: shall the dummy pixel be checked. Dummy pixel are pixels marked as bad and ignored
+ * @param dummy       Float: value for bad pixels
+ * @param delta_dummy Float: precision for bad pixel value
+ * @param do_dark     Bool/int: shall dark-current correction be applied ?
+ * @param dark        Float pointer to global memory storing the dark image.
+ * @param do_flat     Bool/int: shall flat-field correction be applied ? (could contain polarization corrections)
+ * @param flat        Float pointer to global memory storing the flat image.
+ * @param outData     Float pointer to the output 1D array with the weighted histogram
+ * @param outCount    Float pointer to the output 1D array with the unweighted histogram
+ * @param outMerged   Float pointer to the output 1D array with the diffractogram
+
 */
 __kernel void
 lut_integrate(	const 	__global 	float 	*weights,
-				const 	__global 	uint 	bins,
-				const 	__global 	uint 	lut_size,
+									uint 	bins,
+									uint 	lut_size,
 				const 	__global 	uint 	*lut_idx,
 				const 	__global 	float 	*lut_coef,
-				const 				int   	do_dummy,
-				const 				float 	dummy,
-				const 				float 	delta_dummy,
-				const 				int 	do_dark, 
+									int   	do_dummy,
+									float 	dummy,
+									float 	delta_dummy,
+									int 	do_dark,
 				const 	__global 	float 	*dark,
-				const 		 		int		do_flat,
+									int		do_flat,
 				const 	__global 	float 	*flat,
-						__global 	double	*outData,
-						__global 	double	*outCount,
-						__global 	double	*outMerge
+						__global 	float	*outData,
+						__global 	float	*outCount,
+						__global 	float	*outMerge
 		        )
 {
-	
-	uint k, j, i= get_global_id(0);
-	int idx
-	double sum_data = 0.0;
-	double sum_count = 0.0;
-	const double epsilon = 1e-10
-	float coef, data
-	if(gid < bins)
+	uint idx, k, j, i= get_global_id(0);
+	bigfloat_t sum_data = 0.0;
+	bigfloat_t sum_count = 0.0;
+	const bigfloat_t epsilon = 1e-10;
+	float coef, data;
+	if(i < bins)
 	{
 		for (j=0;j<lut_size;j++)
 		{
@ -101,23 +106,23 @@ lut_integrate(	const 	__global 	float 	*weights,
 			coef = lut_coef[k];
 			if((idx <= 0) && (coef <= 0.0))
 			  break;
-			data = weight[idx];
+			data = weights[idx];
 			if( (!do_dummy) || (delta_dummy && (fabs(data-dummy) > delta_dummy))|| (data!=dummy) )
 			{
 				if(do_dark)
 					data -= dark[idx];
-				if do_flat:
+				if(do_flat)
 					data /= flat[idx];

 				sum_data +=  coef * data;
 				sum_count += coef;

-			}//test dummy
-		}//for j
-		outData[i] = sum_data;
-		outCount[i] = sum_count;
+			};//test dummy
+		};//for j
+		outData[i] = (float) sum_data;
+		outCount[i] = (float) sum_count;
 		if (sum_count > epsilon)
-		  outMerge[i] = sum_data / sum_count;
-  }//if bins
-}//end kernel
+		  outMerge[i] = (float) sum_data / sum_count;
+  };//if bins
+};//end kernel

--- a/src/test_splitBBox.py
+++ b/src/test_splitBBox.py
@ -1,10 +1,11 @@
 #!/usr/bin/python
-import os, time
+import os, time, numpy
 import pyFAI, fabio

 root = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "test", "testimages")
 spline = os.path.join(root, "halfccd.spline")
 poni = os.path.join(root, "LaB6.poni")
+bins = 2048
 res = []
 with open(poni, "r") as f:
    for l in f:
@ -18,14 +19,14 @@ edf = os.path.join(root, "LaB6_0020.edf")

 img = fabio.open(edf)
 ai = pyFAI.load(poni)
-ai.xrpd(img.data, 2048)
+ai.xrpd(img.data, bins)
 tth = ai._ttha.ravel().astype("float32")
 dtth = ai._dttha.ravel().astype("float32")
 data = img.data.ravel().astype("float32")

 import splitBBox
 t0 = time.time()
-ra, rb, rc, rd = splitBBox.histoBBox1d(data, tth, dtth, bins=2048)
+ra, rb, rc, rd = splitBBox.histoBBox1d(data, tth, dtth, bins=bins)
 t1 = time.time()
 ref_time = t1 - t0
 print("ref time: %.3fs" % ref_time)
@ -43,7 +44,7 @@ import splitBBoxLUT
 #a, b, c, d, ee = splitBBoxLUT.histoBBox1d(data, tth, dtth, bins=2048)
 #print "LUT max =", ee.max()
 t0 = time.time()
-integ = splitBBoxLUT.HistoBBox1d(tth, dtth, bins=2048)
+integ = splitBBoxLUT.HistoBBox1d(tth, dtth, bins=bins)
 t1 = time.time()
 a, b, c, d = integ.integrate(data)
 t2 = time.time()
@ -55,7 +56,53 @@ t2 = time.time()
 print "speed-up:", ref_time / (t2 - t1)
 from pylab import *
 #plot(ee)
-plot(a, b)
-plot(ra, rb)
+plot(a, b, label="LUT")
+plot(ra, rb, label="Original")
+
+import pyopencl
+
+mf = pyopencl.mem_flags
+ctx = pyopencl.create_some_context()
+q = pyopencl.CommandQueue(ctx)
+program = pyopencl.Program(ctx, open("../openCL/ocl_azim_LUT.cl").read()).build()
+t3 = time.time()
+weights_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=data)
+lut_idx_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_idx)
+lut_coef_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_coef)
+None_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.zeros(1, dtype=numpy.float32))
+outData_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
+outCount_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
+outMerge_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, 4 * bins)
+print program.all_kernels()
+kernel = program.all_kernels()[0]
+
+program.lut_integrate(q, None, None,
+                       weights_buf,
+                       2048,
+                       integ.lut_size,
+                       lut_idx_buf,
+                       lut_coef_buf,
+                       0,
+                       0,
+                       0,
+                       0,
+                       None_buf,
+                       0,
+                       None_buf,
+                       outData_buf,
+                       outCount_buf,
+                       outMerge_buf)
+b = numpy.empty(bins, dtype=numpy.float32)
+c = numpy.empty(bins, dtype=numpy.float32)
+d = numpy.empty(bins, dtype=numpy.float32)
+pyopencl.enqueue_read_buffer(q, outData_buf, c).wait()
+pyopencl.enqueue_read_buffer(q, outCount_buf, d).wait()
+pyopencl.enqueue_read_buffer(q, outMerge_buf, b).wait()
+t4 = time.time()
+print "speed-up:", ref_time / (t4 - t3)
+from pylab import *
+#plot(ee)
+plot(a, b, label="OpenCL")
+
 show()
 raw_input("Enter")