pyFAI/sandbox/_distortionCSR.pyx

533 lines
22 KiB
Cython

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Project: Fast Azimuthal integration
# https://github.com/silx-kit/pyFAI
#
# Copyright (C) 2018 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__author__ = "Jerome Kieffer"
__license__ = "GPLv3+"
__date__ = "28/01/2016"
__copyright__ = "2011-2014, ESRF"
__contact__ = "jerome.kieffer@esrf.fr"
import cython
cimport numpy
import numpy
from cython.parallel import prange
from libc.math cimport floor, ceil, fabs
import logging
import threading
import types
import os
import sys
import time
logger = logging.getLogger("pyFAI._distortionCSR")
from ..detectors import detector_factory
from ..decorators import timeit
import fabio
from . import ocl_azim_csr_dis
cdef struct lut_point:
numpy.int32_t idx
numpy.float32_t coef
cdef bint NEED_DECREF = sys.version_info < (2, 7) and numpy.version.version < "1.5"
cpdef inline float calc_area(float I1, float I2, float slope, float intercept) nogil:
"Calculate the area between I1 and I2 of a line with a given slope & intercept"
return 0.5 * (I2 - I1) * (slope * (I2 + I1) + 2 * intercept)
@cython.cdivision(True)
@cython.boundscheck(False)
cdef inline void integrate(float[:, :] box, float start, float stop, float slope, float intercept) nogil:
"Integrate in a box a line between start and stop, line defined by its slope & intercept "
cdef int i, h = 0
cdef float P, dP, A, AA, dA, sign
if start < stop: # positive contribution
P = ceil(start)
dP = P - start
if P > stop: # start and stop are in the same unit
A = calc_area(start, stop, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
dA = (stop - start) # always positive
h = 0
while AA > 0:
if dA > AA:
dA = AA
AA = -1
box[(<int> floor(start)), h] += sign * dA
AA -= dA
h += 1
else:
if dP > 0:
A = calc_area(start, P, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
h = 0
dA = dP
while AA > 0:
if dA > AA:
dA = AA
AA = -1
box[(<int> floor(P)) - 1, h] += sign * dA
AA -= dA
h += 1
# subsection P1->Pn
for i in range((<int> floor(P)), (<int> floor(stop))):
A = calc_area(i, i + 1, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
h = 0
dA = 1.0
while AA > 0:
if dA > AA:
dA = AA
AA = -1
box[i, h] += sign * dA
AA -= dA
h += 1
# Section Pn->B
P = floor(stop)
dP = stop - P
if dP > 0:
A = calc_area(P, stop, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
h = 0
dA = fabs(dP)
while AA > 0:
if dA > AA:
dA = AA
AA = -1
box[(<int> floor(P)), h] += sign * dA
AA -= dA
h += 1
elif start > stop: # negative contribution. Nota is start=stop: no contribution
P = floor(start)
if stop > P: # start and stop are in the same unit
A = calc_area(start, stop, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
dA = (start - stop) # always positive
h = 0
while AA > 0:
if dA > AA:
dA = AA
AA = -1
box[(<int> floor(start)), h] += sign * dA
AA -= dA
h += 1
else:
dP = P - start
if dP < 0:
A = calc_area(start, P, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
h = 0
dA = fabs(dP)
while AA > 0:
if dA > AA:
dA = AA
AA = -1
box[(<int> floor(P)) , h] += sign * dA
AA -= dA
h += 1
# subsection P1->Pn
for i in range((<int> start), (<int> ceil(stop)), -1):
A = calc_area(i, i - 1, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
h = 0
dA = 1
while AA > 0:
if dA > AA:
dA = AA
AA = -1
box[i - 1 , h] += sign * dA
AA -= dA
h += 1
# Section Pn->B
P = ceil(stop)
dP = stop - P
if dP < 0:
A = calc_area(P, stop, slope, intercept)
if A != 0:
AA = fabs(A)
sign = A / AA
h = 0
dA = fabs(dP)
while AA > 0:
if dA > AA:
dA = AA; AA = -1
box[(<int> floor(stop)), h] += sign * dA
AA -= dA
h += 1
class Distortion(object):
"""
This class applies a distortion correction on an image.
It is also able to apply an inversion of the correction.
"""
def __init__(self, detector="detector", shape=None, compute_device="Host", workgroup_size=32):
"""
:param detector: detector instance or detector name
"""
if type(detector) in types.StringTypes:
self.detector = detector_factory(detector)
else: # we assume it is a Detector instance
self.detector = detector
if "max_shape" in dir(self.detector):
self.shape = self.detector.max_shape
else:
self.shsizeape = shape
self.shape = tuple([int(i) for i in self.shape])
self.bins = self.shape[0] * self.shape[1]
self._sem = threading.Semaphore()
self.bin_size = None
self.lut_size = None
self.pos = None
self.LUT = None
self.delta0 = self.delta1 = None # max size of an pixel on a regular grid ...
self.integrator = None
self.compute_device = compute_device
self.workgroup_size = workgroup_size
def __repr__(self):
return os.linesep.join(["Distortion correction for detector:",
self.detector.__repr__()])
def calc_pos(self):
if self.pos is None:
with self._sem:
if self.pos is None:
pos_corners = numpy.empty((self.shape[0] + 1, self.shape[1] + 1, 2), dtype=numpy.float64)
d1 = numpy.outer(numpy.arange(self.shape[0] + 1, dtype=numpy.float64), numpy.ones(self.shape[1] + 1, dtype=numpy.float64)) - 0.5
d2 = numpy.outer(numpy.ones(self.shape[0] + 1, dtype=numpy.float64), numpy.arange(self.shape[1] + 1, dtype=numpy.float64)) - 0.5
pos_corners[:, :, 0], pos_corners[:, :, 1] = self.detector.calc_cartesian_positions(d1, d2)[:2]
pos_corners[:, :, 0] /= self.detector.pixel1
pos_corners[:, :, 1] /= self.detector.pixel2
pos = numpy.empty((self.shape[0], self.shape[1], 4, 2), dtype=numpy.float32)
pos[:, :, 0, :] = pos_corners[:-1, :-1]
pos[:, :, 1, :] = pos_corners[:-1, 1:]
pos[:, :, 2, :] = pos_corners[1:, 1:]
pos[:, :, 3, :] = pos_corners[1:, :-1]
self.pos = pos
self.delta0 = int((numpy.ceil(pos_corners[1:, :, 0]) - numpy.floor(pos_corners[:-1, :, 0])).max())
self.delta1 = int((numpy.ceil(pos_corners[:, 1:, 1]) - numpy.floor(pos_corners[:, :-1, 1])).max())
return self.pos
@cython.wraparound(False)
@cython.boundscheck(False)
def calc_LUT_size(self):
"""
Considering the "half-CCD" spline from ID11 which describes a (1025,2048) detector,
the physical location of pixels should go from:
[-17.48634 : 1027.0543, -22.768829 : 2028.3689]
We chose to discard pixels falling outside the [0:1025,0:2048] range with a lose of intensity
We keep self.pos: pos_corners will not be compatible with systems showing non adjacent pixels (like some xpads)
"""
cdef int i, j, k, l, shape0, shape1
cdef numpy.ndarray[numpy.float32_t, ndim = 4] pos
cdef int[:, :] pos0min, pos1min, pos0max, pos1max
cdef numpy.ndarray[numpy.int32_t, ndim = 2] lut_size
if self.pos is None:
pos = self.calc_pos()
else:
pos = self.pos
if self.lut_size is None:
with self._sem:
if self.lut_size is None:
shape0, shape1 = self.shape
pos0min = numpy.floor(pos[:, :, :, 0].min(axis=-1)).astype(numpy.int32).clip(0, self.shape[0])
pos1min = numpy.floor(pos[:, :, :, 1].min(axis=-1)).astype(numpy.int32).clip(0, self.shape[1])
pos0max = (numpy.ceil(pos[:, :, :, 0].max(axis=-1)).astype(numpy.int32) + 1).clip(0, self.shape[0])
pos1max = (numpy.ceil(pos[:, :, :, 1].max(axis=-1)).astype(numpy.int32) + 1).clip(0, self.shape[1])
lut_size = numpy.zeros(self.shape, dtype=numpy.int32)
with nogil:
for i in range(shape0):
for j in range(shape1):
for k in range(pos0min[i, j], pos0max[i, j]):
for l in range(pos1min[i, j], pos1max[i, j]):
lut_size[k, l] += 1
self.bin_size = lut_size.ravel()
self.lut_size = self.bin_size.sum()
return lut_size
@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
def calc_LUT(self):
cdef:
int i, j, ms, ml, ns, nl, shape0, shape1, delta0, delta1, buffer_size, i0, i1, size
int offset0, offset1, box_size0, box_size1, bins, tmp_index
numpy.int32_t k, idx=0
float A0, A1, B0, B1, C0, C1, D0, D1, pAB, pBC, pCD, pDA, cAB, cBC, cCD, cDA, area, value
float[:, :, :, :] pos
numpy.ndarray[numpy.int32_t, ndim = 2] outMax = numpy.zeros(self.shape, dtype=numpy.int32)
float[:, :] buffer
numpy.ndarray[numpy.int32_t, ndim = 1] indptr
numpy.ndarray[numpy.int32_t, ndim = 1] indices
numpy.ndarray[numpy.float32_t, ndim = 1] data
numpy.ndarray[numpy.int32_t, ndim = 1] bin_size
shape0, shape1 = self.shape
bin_size = self.bin_size
if self.lut_size is None:
self.calc_LUT_size()
if self.LUT is None:
with self._sem:
if self.LUT is None:
pos = self.pos
indices = numpy.zeros(shape=self.lut_size, dtype=numpy.int32)
data = numpy.zeros(shape=self.lut_size, dtype=numpy.float32)
bins = shape0 * shape1
indptr = numpy.zeros(bins + 1, dtype=numpy.int32)
indptr[1:] = bin_size.cumsum(dtype=numpy.int32)
indices_size = self.lut_size * sizeof(numpy.int32)
data_size = self.lut_size * sizeof(numpy.float32)
indptr_size = bins * sizeof(numpy.int32)
logger.info("CSR matrix: %.3f MByte" % ((indices_size+data_size+indptr_size)/1.0e6))
buffer = numpy.empty((self.delta0, self.delta1),dtype=numpy.float32)
buffer_size = self.delta0 * self.delta1 * sizeof(float)
logger.info("Max pixel size: %ix%i; Max source pixel in target: %i" % (buffer.shape[1],buffer.shape[0], self.lut_size))
with nogil:
# i,j, idx are indices of the raw image uncorrected
for i in range(shape0):
for j in range(shape1):
# reinit of buffer
buffer[:, :] = 0
A0 = pos[i, j, 0, 0]
A1 = pos[i, j, 0, 1]
B0 = pos[i, j, 1, 0]
B1 = pos[i, j, 1, 1]
C0 = pos[i, j, 2, 0]
C1 = pos[i, j, 2, 1]
D0 = pos[i, j, 3, 0]
D1 = pos[i, j, 3, 1]
offset0 = (<int> floor(min(A0, B0, C0, D0)))
offset1 = (<int> floor(min(A1, B1, C1, D1)))
box_size0 = (<int> ceil(max(A0, B0, C0, D0))) - offset0
box_size1 = (<int> ceil(max(A1, B1, C1, D1))) - offset1
A0 -= <float> offset0
A1 -= <float> offset1
B0 -= <float> offset0
B1 -= <float> offset1
C0 -= <float> offset0
C1 -= <float> offset1
D0 -= <float> offset0
D1 -= <float> offset1
if B0 != A0:
pAB = (B1 - A1) / (B0 - A0)
cAB = A1 - pAB * A0
else:
pAB = cAB = 0.0
if C0 != B0:
pBC = (C1 - B1) / (C0 - B0)
cBC = B1 - pBC * B0
else:
pBC = cBC = 0.0
if D0 != C0:
pCD = (D1 - C1) / (D0 - C0)
cCD = C1 - pCD * C0
else:
pCD = cCD = 0.0
if A0 != D0:
pDA = (A1 - D1) / (A0 - D0)
cDA = D1 - pDA * D0
else:
pDA = cDA = 0.0
integrate(buffer, B0, A0, pAB, cAB)
integrate(buffer, A0, D0, pDA, cDA)
integrate(buffer, D0, C0, pCD, cCD)
integrate(buffer, C0, B0, pBC, cBC)
area = 0.5*((C0 - A0)*(D1 - B1)-(C1 - A1)*(D0 - B0))
for ms in range(box_size0):
ml = ms + offset0
if ml < 0 or ml >= shape0:
continue
for ns in range(box_size1):
# ms,ns are indexes of the corrected image in short form, ml & nl are the same
nl = ns + offset1
if nl < 0 or nl >= shape1:
continue
value = buffer[ms, ns] / area
if value <= 0:
continue
k = outMax[ml,nl]
tmp_index = indptr[ml*shape1+nl]
indices[tmp_index+k] = idx
data[tmp_index+k] = value
outMax[ml,nl] = k + 1
idx += 1
#for i in range(bins):
#tmp_index = indptr[i]
#index_tmp_index = indices[tmp_index + bin_size[i] - 1]
#for j in range(tmp_index + bin_size[i], tmp_index + bin_size_padded[i]):
#indices[j] = index_tmp_index
self.LUT = (data, indices, indptr)
@cython.wraparound(False)
@cython.boundscheck(False)
def correctHost(self, image):
"""
Correct an image based on the look-up table calculated ...
Calculation takes place on the Host
:param image: 2D-array with the image
:return: corrected 2D image
"""
cdef int i, j, idx, size, bins
cdef float coef, tmp
cdef float[:] lout, lin, data
cdef int[:] indices, indptr
if self.LUT is None:
self.calc_LUT()
data = self.LUT[0]
indices = self.LUT[1]
indptr = self.LUT[2]
bins = self.bins
img_shape = image.shape
if (img_shape[0] < self.shape[0]) or (img_shape[1] < self.shape[1]):
new_image = numpy.zeros(self.shape, dtype=numpy.float32)
new_image[:img_shape[0], :img_shape[1]] = image
image = new_image
logger.warning("Patching image as image is %ix%i and spline is %ix%i" % (img_shape[1], img_shape[0], self.shape[1], self.shape[0]))
out = numpy.zeros(self.shape, dtype=numpy.float32)
lout = out.ravel()
lin = numpy.ascontiguousarray(image.ravel(), dtype=numpy.float32)
size = lin.size
for i in prange(bins, nogil=True, schedule="static"):
for j in range(indptr[i], indptr[i + 1]):
idx = indices[j]
coef = data[j]
if coef <= 0:
continue
if idx >= size:
with gil:
logger.warning("Accessing %i >= %i !!!" % (idx, size))
continue
tmp = lout[i] + lin[idx] * coef
lout[i] = tmp
return out[:img_shape[0], :img_shape[1]]
@cython.wraparound(False)
@cython.boundscheck(False)
def correctDevice(self, image):
"""
Correct an image based on the look-up table calculated ...
Calculation takes place on the device
:param image: 2D-array with the image
:return: corrected 2D image
"""
if self.integrator is None:
if self.LUT is None:
self.calc_LUT()
self.integrator = ocl_azim_csr_dis.OCL_CSR_Integrator(self.LUT, self.bins, "GPU", block_size=self.workgroup_size)
img_shape = image.shape
if (img_shape[0] < self.shape[0]) or (img_shape[1] < self.shape[1]):
new_image = numpy.zeros(self.shape, dtype=numpy.float32)
new_image[:img_shape[0], :img_shape[1]] = image
image = new_image
logger.warning("Patching image as image is %ix%i and spline is %ix%i" % (img_shape[1], img_shape[0], self.shape[1], self.shape[0]))
out = self.integrator.integrate(image)
out[1].shape = self.shape
return out[1][:img_shape[0], :img_shape[1]]
@cython.wraparound(False)
@cython.boundscheck(False)
def correct(self, image):
if self.compute_device == "Host":
out = self.correctHost(image)
elif self.compute_device == "Device":
out = self.correctDevice(image)
else:
logger.warning("Please select a compute device (Host or Device)")
return out
def setHost(self):
self.compute_device = "Host"
def setDevice(self):
self.compute_device = "Device"
@timeit
def uncorrect(self, image):
"""
Take an image which has been corrected and transform it into it's raw (with loss of information)
:param image: 2D-array with the image
:return: uncorrected 2D image and a mask (pixels in raw image
"""
cdef int[:] indices, indptr
cdef float[:] data
cdef int idx, bins
if self.LUT is None:
with self._sem:
if self.LUT is None:
self.calc_LUT()
out = numpy.zeros(self.shape, dtype=numpy.float32)
mask = numpy.zeros(self.shape, dtype=numpy.int8)
lmask = mask.ravel()
lout = out.ravel()
lin = image.ravel()
data = self.LUT[0]
indices = self.LUT[1]
indptr = self.LUT[2]
bins = self.bins
for idx in range(bins):
idx1 = indptr[idx]
idx2 = indptr[idx+1]
if idx1 == idx2:
lmask[idx] = 1
continue
val = lin[idx] / data[idx1:idx2].sum()
lout[indices[idx1:idx2]] += val * data[idx1:idx2]
return out, mask