diff --git a/openCL/ocl_azim_LUT.cl b/openCL/ocl_azim_LUT.cl index a3568dba..1f3f1a14 100644 --- a/openCL/ocl_azim_LUT.cl +++ b/openCL/ocl_azim_LUT.cl @@ -263,7 +263,7 @@ lut_integrate_image( __read_only image2d_t weights, coef = lut[k].coef; if((idx == 0) && (coef <= 0.0)) break; - data = read_imagef(weights, sampler, (int2)(idx/dimX , idx%dimX)).s0; + data = read_imagef(weights, sampler, (int2)(idx%dimY , idx/dimY)).s0; //data = weights[idx]; if( (!do_dummy) || (delta_dummy && (fabs(data-dummy) > delta_dummy))|| (data!=dummy) ) { diff --git a/src/test_splitBBox.py b/src/test_splitBBox.py index b622b938..e5b4effc 100755 --- a/src/test_splitBBox.py +++ b/src/test_splitBBox.py @@ -87,7 +87,7 @@ q = pyopencl.CommandQueue(ctx) program = pyopencl.Program(ctx, open("../openCL/ocl_azim_LUT.cl").read()).build() t3 = time.time() weights_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=data) -#weights_img = pyopencl.image_from_array(ctx, ary=img.data.astype(numpy.float32), mode="r", norm_int=False, num_channels=1) +weights_img = pyopencl.image_from_array(ctx, ary=img.data.astype(numpy.float32), mode="r", norm_int=False, num_channels=1) #print co.INTENSITY, ct.FLOAT, #imf = pyopencl.ImageFormat(numpy.uint32(co.INTENSITY), numpy.uint32(ct.FLOAT)) #weights_img = pyopencl.Image(ctx, flags=mf.READ_ONLY | mf.COPY_HOST_PTR, @@ -96,19 +96,57 @@ weights_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=data # pitches=(img.data.shape[-1],)) #image_from_array(ctx, ary=img.data.astype(numpy.float32), mode="r", norm_int=False, num_channels=1) -#lut_idx_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_idx.astype(numpy.uint32)) -#lut_coef_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_coef) +lut_idx_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_idx.astype(numpy.uint32)) +lut_coef_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut_coef) lut_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=integ.lut) None_buf = pyopencl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=numpy.zeros(1, dtype=numpy.float32)) outData_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, numpy.dtype(numpy.float32).itemsize * bins) outCount_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, numpy.dtype(numpy.float32).itemsize * bins) outMerge_buf = pyopencl.Buffer(ctx, mf.WRITE_ONLY, numpy.dtype(numpy.float32).itemsize * bins) -args = (#weights_img, numpy.uint32(img.dim1), numpy.uint32(img.dim0), +args_orig = (#weights_img, numpy.uint32(img.dim1), numpy.uint32(img.dim0), weights_buf, numpy.uint32(2048), numpy.uint32(integ.lut_size), -# lut_idx_buf, -# lut_coef_buf, + lut_idx_buf, + lut_coef_buf, +# lut_buf, + numpy.int32(0), + numpy.float32(0), + numpy.float32(0), + numpy.int32(0), + None_buf, + numpy.int32(0), + None_buf, + outData_buf, + outCount_buf, + outMerge_buf) +t4 = time.time() +program.lut_integrate(q, (bins,), (16,), *args_orig) +b = numpy.empty(bins, dtype=numpy.float32) +c = numpy.empty(bins, dtype=numpy.float32) +d = numpy.empty(bins, dtype=numpy.float32) +pyopencl.enqueue_copy(q, c, outData_buf) +pyopencl.enqueue_copy(q, d, outCount_buf) +pyopencl.enqueue_copy(q, b, outMerge_buf).wait() +t5 = time.time() +pylab.plot(a, b, label="OpenCL_orig") + +print "OpenCL speed-up: %s setup: %.2fms \texec: %.2fms" % (0.001 * ref_time / (t5 - t3), 1000 * (t4 - t3), 1000 * (t5 - t4)) +print abs(ra - a).max(), abs(rb - b).max(), abs(rc - c).max(), abs(rd - d).max() +for i in range(10): + j = 2 ** i + st = time.time() + program.lut_integrate(q, (bins,), (j,), * args_orig) + pyopencl.enqueue_copy(q, b, outMerge_buf).wait() + print("Size: %s \ttime: %.2fms" % (j, 1000 * (time.time() - st))) + + +args_single = (#weights_img, numpy.uint32(img.dim1), numpy.uint32(img.dim0), + weights_buf, + numpy.uint32(2048), + numpy.uint32(integ.lut_size), + #lut_idx_buf, + #lut_coef_buf, lut_buf, numpy.int32(0), numpy.float32(0), @@ -121,8 +159,7 @@ args = (#weights_img, numpy.uint32(img.dim1), numpy.uint32(img.dim0), outCount_buf, outMerge_buf) t4 = time.time() -print len(args) -program.lut_integrate_single(q, (bins,), (16,), *args) +program.lut_integrate_single(q, (bins,), (16,), *args_single) b = numpy.empty(bins, dtype=numpy.float32) c = numpy.empty(bins, dtype=numpy.float32) d = numpy.empty(bins, dtype=numpy.float32) @@ -130,17 +167,56 @@ pyopencl.enqueue_copy(q, c, outData_buf) pyopencl.enqueue_copy(q, d, outCount_buf) pyopencl.enqueue_copy(q, b, outMerge_buf).wait() t5 = time.time() -pylab.plot(a, b, label="OpenCL") +pylab.plot(a, b, label="OpenCL_single") print "OpenCL speed-up: %s setup: %.2fms \texec: %.2fms" % (0.001 * ref_time / (t5 - t3), 1000 * (t4 - t3), 1000 * (t5 - t4)) print abs(ra - a).max(), abs(rb - b).max(), abs(rc - c).max(), abs(rd - d).max() for i in range(10): j = 2 ** i st = time.time() - program.lut_integrate_single(q, (bins,), (j,), * args) + program.lut_integrate_single(q, (bins,), (j,), * args_single) pyopencl.enqueue_copy(q, b, outMerge_buf).wait() print("Size: %s \ttime: %.2fms" % (j, 1000 * (time.time() - st))) + +args_image = (weights_img, numpy.uint32(img.dim2), numpy.uint32(img.dim1), +# weights_buf, + numpy.uint32(2048), + numpy.uint32(integ.lut_size), + #lut_idx_buf, + #lut_coef_buf, + lut_buf, + numpy.int32(0), + numpy.float32(0), + numpy.float32(0), + numpy.int32(0), + None_buf, + numpy.int32(0), + None_buf, + outData_buf, + outCount_buf, + outMerge_buf) +t4 = time.time() +program.lut_integrate_image(q, (bins,), (16,), *args_image) +b = numpy.empty(bins, dtype=numpy.float32) +c = numpy.empty(bins, dtype=numpy.float32) +d = numpy.empty(bins, dtype=numpy.float32) +pyopencl.enqueue_copy(q, c, outData_buf) +pyopencl.enqueue_copy(q, d, outCount_buf) +pyopencl.enqueue_copy(q, b, outMerge_buf).wait() +t5 = time.time() +pylab.plot(a, b, label="OpenCL_image") + +print "OpenCL speed-up: %s setup: %.2fms \texec: %.2fms" % (0.001 * ref_time / (t5 - t3), 1000 * (t4 - t3), 1000 * (t5 - t4)) +print abs(ra - a).max(), abs(rb - b).max(), abs(rc - c).max(), abs(rd - d).max() +for i in range(10): + j = 2 ** i + st = time.time() + program.lut_integrate_image(q, (bins,), (j,), * args_image) + pyopencl.enqueue_copy(q, b, outMerge_buf).wait() + print("Size: %s \ttime: %.2fms" % (j, 1000 * (time.time() - st))) + + #plot(ee) #pylab.plot(a, b, label="OpenCL") pylab.legend()