# -*- coding: utf-8 -*-
"""Interpolation functions.
"""
import numpy as np
import numba as nb
from sigpy import backend, config, util
__all__ = ['interpolate', 'gridding']
KERNELS = ['spline', 'kaiser_bessel']
[docs]def interpolate(input, coord, kernel='spline', width=2, param=1):
r"""Interpolation from array to points specified by coordinates.
Let :math:`x` be the input, :math:`y` be the output,
:math:`c` be the coordinates, :math:`W` be the kernel width,
and :math:`K` be the interpolation kernel, then the function computes,
.. math ::
y[j] = \sum_{i : \| i - c[j] \|_\infty \leq W / 2}
K\left(\frac{i - c[j]}{W / 2}\right) x[i]
There are two types of kernels: 'spline' and 'kaiser_bessel'.
'spline' uses the cardinal B-spline functions as kernels.
The order of the spline can be specified using param.
For example, param=1 performs linear interpolation.
Concretely, for param=0, :math:`K(x) = 1`,
for param=1, :math:`K(x) = 1 - |x|`, and
for param=2, :math:`K(x) = \frac{9}{8} (1 - |x|)^2`
for :math:`|x| > \frac{1}{3}`
and :math:`K(x) = \frac{3}{4} (1 - 3 x^2)` for :math:`|x| < \frac{1}{3}`.
These function expressions are derived from the reference wikipedia
page by shifting and scaling the range to -1 to 1.
When the coordinates specifies a uniformly spaced grid,
it is recommended to use the original scaling with width=param + 1
so that the interpolation weights add up to one.
'kaiser_bessel' uses the Kaiser-Bessel function as kernel.
Concretely, :math:`K(x) = I_0(\beta \sqrt{1 - x^2})`,
where :math:`I_0` is the modified Bessel function of the first kind.
The beta parameter can be specified with param.
The modified Bessel function of the first kind is approximated
using the power series, following the reference.
Args:
input (array): Input array of shape.
coord (array): Coordinate array of shape [..., ndim]
width (float or tuple of floats): Interpolation kernel full-width.
kernel (str): Interpolation kernel, {'spline', 'kaiser_bessel'}.
param (float or tuple of floats): Kernel parameter.
Returns:
output (array): Output array.
References:
https://en.wikipedia.org/wiki/Spline_wavelet#Cardinal_B-splines_of_small_orders
http://people.math.sfu.ca/~cbm/aands/page_378.htm
"""
ndim = coord.shape[-1]
batch_shape = input.shape[:-ndim]
batch_size = util.prod(batch_shape)
pts_shape = coord.shape[:-1]
npts = util.prod(pts_shape)
xp = backend.get_array_module(input)
input = input.reshape([batch_size] + list(input.shape[-ndim:]))
coord = coord.reshape([npts, ndim])
output = xp.zeros([batch_size, npts], dtype=input.dtype)
if np.isscalar(param):
param = xp.array([param] * ndim, coord.dtype)
else:
param = xp.array(param, coord.dtype)
if np.isscalar(width):
width = xp.array([width] * ndim, coord.dtype)
else:
width = xp.array(width, coord.dtype)
if xp == np:
_interpolate[kernel][ndim - 1](output, input, coord, width, param)
else: # pragma: no cover
_interpolate_cuda[kernel][ndim - 1](
input, coord, width, param, output, size=npts)
return output.reshape(batch_shape + pts_shape)
[docs]def gridding(input, coord, shape, kernel="spline", width=2, param=1):
r"""Gridding of points specified by coordinates to array.
Let :math:`y` be the input, :math:`x` be the output,
:math:`c` be the coordinates, :math:`W` be the kernel width,
and :math:`K` be the interpolation kernel, then the function computes,
.. math ::
x[i] = \sum_{j : \| i - c[j] \|_\infty \leq W / 2}
K\left(\frac{i - c[j]}{W / 2}\right) y[j]
There are two types of kernels: 'spline' and 'kaiser_bessel'.
'spline' uses the cardinal B-spline functions as kernels.
The order of the spline can be specified using param.
For example, param=1 performs linear interpolation.
Concretely, for param=0, :math:`K(x) = 1`,
for param=1, :math:`K(x) = 1 - |x|`, and
for param=2, :math:`K(x) = \frac{9}{8} (1 - |x|)^2`
for :math:`|x| > \frac{1}{3}`
and :math:`K(x) = \frac{3}{4} (1 - 3 x^2)` for :math:`|x| < \frac{1}{3}`.
These function expressions are derived from the reference wikipedia
page by shifting and scaling the range to -1 to 1.
When the coordinates specifies a uniformly spaced grid,
it is recommended to use the original scaling with width=param + 1
so that the interpolation weights add up to one.
'kaiser_bessel' uses the Kaiser-Bessel function as kernel.
Concretely, :math:`K(x) = I_0(\beta \sqrt{1 - x^2})`,
where :math:`I_0` is the modified Bessel function of the first kind.
The beta parameter can be specified with param.
The modified Bessel function of the first kind is approximated
using the power series, following the reference.
Args:
input (array): Input array.
coord (array): Coordinate array of shape [..., ndim]
width (float or tuple of floats): Interpolation kernel full-width.
kernel (str): Interpolation kernel, {"spline", "kaiser_bessel"}.
param (float or tuple of floats): Kernel parameter.
Returns:
output (array): Output array.
References:
https://en.wikipedia.org/wiki/Spline_wavelet#Cardinal_B-splines_of_small_orders
http://people.math.sfu.ca/~cbm/aands/page_378.htm
"""
ndim = coord.shape[-1]
batch_shape = shape[:-ndim]
batch_size = util.prod(batch_shape)
pts_shape = coord.shape[:-1]
npts = util.prod(pts_shape)
xp = backend.get_array_module(input)
isreal = np.issubdtype(input.dtype, np.floating)
input = input.reshape([batch_size, npts])
coord = coord.reshape([npts, ndim])
output = xp.zeros([batch_size] + list(shape[-ndim:]), dtype=input.dtype)
if np.isscalar(param):
param = xp.array([param] * ndim, coord.dtype)
else:
param = xp.array(param, coord.dtype)
if np.isscalar(width):
width = xp.array([width] * ndim, coord.dtype)
else:
width = xp.array(width, coord.dtype)
if xp == np:
_gridding[kernel][ndim - 1](output, input, coord, width, param)
else: # pragma: no cover
if isreal:
_gridding_cuda[kernel][ndim - 1](
input, coord, width, param, output, size=npts)
else:
_gridding_cuda_complex[kernel][ndim - 1](
input, coord, width, param, output, size=npts)
return output.reshape(shape)
@nb.jit(nopython=True, cache=True) # pragma: no cover
def _spline_kernel(x, order):
if abs(x) > 1:
return 0
if order == 0:
return 1
elif order == 1:
return 1 - abs(x)
elif order == 2:
if abs(x) > 1 / 3:
return 9 / 8 * (1 - abs(x))**2
else:
return 3 / 4 * (1 - 3 * x**2)
@nb.jit(nopython=True, cache=True) # pragma: no cover
def _kaiser_bessel_kernel(x, beta):
if abs(x) > 1:
return 0
x = beta * (1 - x**2)**0.5
t = x / 3.75
if x < 3.75:
return 1 + 3.5156229 * t**2 + 3.0899424 * t**4 +\
1.2067492 * t**6 + 0.2659732 * t**8 +\
0.0360768 * t**10 + 0.0045813 * t**12
else:
return x**-0.5 * np.exp(x) * (
0.39894228 + 0.01328592 * t**-1 +
0.00225319 * t**-2 - 0.00157565 * t**-3 +
0.00916281 * t**-4 - 0.02057706 * t**-5 +
0.02635537 * t**-6 - 0.01647633 * t**-7 +
0.00392377 * t**-8)
def _get_interpolate(kernel):
if kernel == 'spline':
kernel = _spline_kernel
elif kernel == 'kaiser_bessel':
kernel = _kaiser_bessel_kernel
@nb.jit(nopython=True) # pragma: no cover
def _interpolate1(output, input, coord, width, param):
batch_size, nx = input.shape
npts = coord.shape[0]
for i in range(npts):
kx = coord[i, -1]
x0 = np.ceil(kx - width[-1] / 2)
x1 = np.floor(kx + width[-1] / 2)
for x in range(x0, x1 + 1):
w = kernel((x - kx) / (width[-1] / 2), param[-1])
for b in range(batch_size):
output[b, i] += w * input[b, x % nx]
return output
@nb.jit(nopython=True) # pragma: no cover
def _interpolate2(output, input, coord, width, param):
batch_size, ny, nx = input.shape
npts = coord.shape[0]
for i in range(npts):
kx, ky = coord[i, -1], coord[i, -2]
x0, y0 = (np.ceil(kx - width[-1] / 2),
np.ceil(ky - width[-2] / 2))
x1, y1 = (np.floor(kx + width[-1] / 2),
np.floor(ky + width[-2] / 2))
for y in range(y0, y1 + 1):
wy = kernel((y - ky) / (width[-2] / 2), param[-2])
for x in range(x0, x1 + 1):
w = wy * kernel((x - kx) / (width[-1] / 2), param[-1])
for b in range(batch_size):
output[b, i] += w * input[b, y % ny, x % nx]
return output
@nb.jit(nopython=True) # pragma: no cover
def _interpolate3(output, input, coord, width, param):
batch_size, nz, ny, nx = input.shape
npts = coord.shape[0]
for i in range(npts):
kx, ky, kz = coord[i, -1], coord[i, -2], coord[i, -3]
x0, y0, z0 = (np.ceil(kx - width[-1] / 2),
np.ceil(ky - width[-2] / 2),
np.ceil(kz - width[-3] / 2))
x1, y1, z1 = (np.floor(kx + width[-1] / 2),
np.floor(ky + width[-2] / 2),
np.floor(kz + width[-3] / 2))
for z in range(z0, z1 + 1):
wz = kernel((z - kz) / (width[-3] / 2), param[-3])
for y in range(y0, y1 + 1):
wy = wz * kernel((y - ky) / (width[-2] / 2), param[-2])
for x in range(x0, x1 + 1):
w = wy * kernel((x - kx) / (width[-1] / 2), param[-1])
for b in range(batch_size):
output[b, i] += w * input[
b, z % nz, y % ny, x % nx]
return output
return _interpolate1, _interpolate2, _interpolate3
def _get_gridding(kernel):
if kernel == 'spline':
kernel = _spline_kernel
elif kernel == 'kaiser_bessel':
kernel = _kaiser_bessel_kernel
@nb.jit(nopython=True) # pragma: no cover
def _gridding1(output, input, coord, width, param):
batch_size, nx = output.shape
npts = coord.shape[0]
for i in range(npts):
kx = coord[i, -1]
x0 = np.ceil(kx - width[-1] / 2)
x1 = np.floor(kx + width[-1] / 2)
for x in range(x0, x1 + 1):
w = kernel((x - kx) / (width[-1] / 2), param[-1])
for b in range(batch_size):
output[b, x % nx] += w * input[b, i]
return output
@nb.jit(nopython=True) # pragma: no cover
def _gridding2(output, input, coord, width, param):
batch_size, ny, nx = output.shape
npts = coord.shape[0]
for i in range(npts):
kx, ky = coord[i, -1], coord[i, -2]
x0, y0 = (np.ceil(kx - width[-1] / 2),
np.ceil(ky - width[-2] / 2))
x1, y1 = (np.floor(kx + width[-1] / 2),
np.floor(ky + width[-2] / 2))
for y in range(y0, y1 + 1):
wy = kernel((y - ky) / (width[-2] / 2), param[-2])
for x in range(x0, x1 + 1):
w = wy * kernel((x - kx) / (width[-1] / 2), param[-1])
for b in range(batch_size):
output[b, y % ny, x % nx] += w * input[b, i]
return output
@nb.jit(nopython=True) # pragma: no cover
def _gridding3(output, input, coord, width, param):
batch_size, nz, ny, nx = output.shape
npts = coord.shape[0]
for i in range(npts):
kx, ky, kz = coord[i, -1], coord[i, -2], coord[i, -3]
x0, y0, z0 = (np.ceil(kx - width[-1] / 2),
np.ceil(ky - width[-2] / 2),
np.ceil(kz - width[-3] / 2))
x1, y1, z1 = (np.floor(kx + width[-1] / 2),
np.floor(ky + width[-2] / 2),
np.floor(kz + width[-3] / 2))
for z in range(z0, z1 + 1):
wz = kernel((z - kz) / (width[-3] / 2), param[-3])
for y in range(y0, y1 + 1):
wy = wz * kernel((y - ky) / (width[-2] / 2), param[-2])
for x in range(x0, x1 + 1):
w = wy * kernel(
(x - kx) / (width[-1] / 2), param[-1])
for b in range(batch_size):
output[b, z % nz, y % ny, x % nx] += w * input[
b, i]
return output
return _gridding1, _gridding2, _gridding3
_interpolate = {}
_gridding = {}
for kernel in KERNELS:
_interpolate[kernel] = _get_interpolate(kernel)
_gridding[kernel] = _get_gridding(kernel)
if config.cupy_enabled: # pragma: no cover
import cupy as cp
_spline_kernel_cuda = """
__device__ inline S kernel(S x, S order) {
if (fabsf(x) > 1)
return 0;
if (order == 0)
return 1;
else if (order == 1)
return 1 - fabsf(x);
else if (fabsf(x) > 1 / 3)
return 9 / 8 * (1 - fabsf(x)) * (1 - fabsf(x));
else
return 3 / 4 * (1 - 3 * x * x);
}
"""
_kaiser_bessel_kernel_cuda = """
__device__ inline S kernel(S x, S beta) {
if (fabsf(x) > 1)
return 0;
x = beta * sqrt(1 - x * x);
S t = x / 3.75;
S t2 = t * t;
S t4 = t2 * t2;
S t6 = t4 * t2;
S t8 = t6 * t2;
if (x < 3.75) {
S t10 = t8 * t2;
S t12 = t10 * t2;
return 1 + 3.5156229 * t2 + 3.0899424 * t4 +
1.2067492 * t6 + 0.2659732 * t8 +
0.0360768 * t10 + 0.0045813 * t12;
} else {
S t3 = t * t2;
S t5 = t3 * t2;
S t7 = t5 * t2;
return exp(x) / sqrt(x) * (
0.39894228 + 0.01328592 / t +
0.00225319 / t2 - 0.00157565 / t3 +
0.00916281 / t4 - 0.02057706 / t5 +
0.02635537 / t6 - 0.01647633 / t7 +
0.00392377 / t8);
}
}
"""
mod_cuda = """
__device__ inline int mod(int x, int n) {
return (x % n + n) % n;
}
"""
def _get_interpolate_cuda(kernel):
if kernel == 'spline':
kernel = _spline_kernel_cuda
elif kernel == 'kaiser_bessel':
kernel = _kaiser_bessel_kernel_cuda
_interpolate1_cuda = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param',
'raw T output',
"""
const int ndim = 1;
const int batch_size = input.shape()[0];
const int nx = input.shape()[1];
const int coord_idx[] = {i, 0};
const S kx = coord[coord_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
for (int x = x0; x < x1 + 1; x++) {
const S w = kernel(
((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, mod(x, nx)};
const T v = (T) w * input[input_idx];
const int output_idx[] = {b, i};
output[output_idx] += v;
}
}
""",
name='interpolate1',
preamble=kernel + mod_cuda,
reduce_dims=False)
_interpolate2_cuda = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param',
'raw T output',
"""
const int ndim = 2;
const int batch_size = input.shape()[0];
const int ny = input.shape()[1];
const int nx = input.shape()[2];
const int coordx_idx[] = {i, 1};
const S kx = coord[coordx_idx];
const int coordy_idx[] = {i, 0};
const S ky = coord[coordy_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int y0 = ceil(ky - width[ndim - 2] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
const int y1 = floor(ky + width[ndim - 2] / 2.0);
for (int y = y0; y < y1 + 1; y++) {
const S wy = kernel(
((S) y - ky) / (width[ndim - 2] / 2.0),
param[ndim - 2]);
for (int x = x0; x < x1 + 1; x++) {
const S w = wy * kernel(
((S) x - kx) / (width[ndim - 1] / 2.0),
param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, mod(y, ny), mod(x, nx)};
const T v = (T) w * input[input_idx];
const int output_idx[] = {b, i};
output[output_idx] += v;
}
}
}
""",
name='interpolate2',
preamble=kernel + mod_cuda,
reduce_dims=False)
_interpolate3_cuda = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """
const int ndim = 3;
const int batch_size = input.shape()[0];
const int nz = input.shape()[1];
const int ny = input.shape()[2];
const int nx = input.shape()[3];
const int coordz_idx[] = {i, 0};
const S kz = coord[coordz_idx];
const int coordy_idx[] = {i, 1};
const S ky = coord[coordy_idx];
const int coordx_idx[] = {i, 2};
const S kx = coord[coordx_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int y0 = ceil(ky - width[ndim - 2] / 2.0);
const int z0 = ceil(kz - width[ndim - 3] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
const int y1 = floor(ky + width[ndim - 2] / 2.0);
const int z1 = floor(kz + width[ndim - 3] / 2.0);
for (int z = z0; z < z1 + 1; z++) {
const S wz = kernel(
((S) z - kz) / (width[ndim - 3] / 2.0),
param[ndim - 3]);
for (int y = y0; y < y1 + 1; y++) {
const S wy = wz * kernel(
((S) y - ky) / (width[ndim - 2] / 2.0),
param[ndim - 2]);
for (int x = x0; x < x1 + 1; x++) {
const S w = wy * kernel(
((S) x - kx) / (width[ndim - 1] / 2.0),
param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, mod(z, nz), mod(y, ny),
mod(x, nx)};
const T v = (T) w * input[input_idx];
const int output_idx[] = {b, i};
output[output_idx] += v;
}
}
}
}
""", name='interpolate3', preamble=kernel + mod_cuda,
reduce_dims=False)
return _interpolate1_cuda, _interpolate2_cuda, _interpolate3_cuda
def _get_gridding_cuda(kernel):
if kernel == 'spline':
kernel = _spline_kernel_cuda
elif kernel == 'kaiser_bessel':
kernel = _kaiser_bessel_kernel_cuda
_gridding1_cuda = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param',
'raw T output',
"""
const int ndim = 1;
const int batch_size = output.shape()[0];
const int nx = output.shape()[1];
const int coord_idx[] = {i, 0};
const S kx = coord[coord_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
for (int x = x0; x < x1 + 1; x++) {
const S w = kernel(
((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, i};
const T v = (T) w * input[input_idx];
const int output_idx[] = {b, mod(x, nx)};
atomicAdd(&output[output_idx], v);
}
}
""",
name='gridding1',
preamble=kernel + mod_cuda,
reduce_dims=False)
_gridding2_cuda = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """
const int ndim = 2;
const int batch_size = output.shape()[0];
const int ny = output.shape()[1];
const int nx = output.shape()[2];
const int coordx_idx[] = {i, 1};
const S kx = coord[coordx_idx];
const int coordy_idx[] = {i, 0};
const S ky = coord[coordy_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int y0 = ceil(ky - width[ndim - 2] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
const int y1 = floor(ky + width[ndim - 2] / 2.0);
for (int y = y0; y < y1 + 1; y++) {
const S wy = kernel(
((S) y - ky) / (width[ndim - 2] / 2.0),
param[ndim - 2]);
for (int x = x0; x < x1 + 1; x++) {
const S w = wy * kernel(
((S) x - kx) / (width[ndim - 1] / 2.0),
param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, i};
const T v = (T) w * input[input_idx];
const int output_idx[] = {b, mod(y, ny), mod(x, nx)};
atomicAdd(&output[output_idx], v);
}
}
}
""", name='gridding2', preamble=kernel + mod_cuda,
reduce_dims=False)
_gridding3_cuda = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """
const int ndim = 3;
const int batch_size = output.shape()[0];
const int nz = output.shape()[1];
const int ny = output.shape()[2];
const int nx = output.shape()[3];
const int coordz_idx[] = {i, 0};
const S kz = coord[coordz_idx];
const int coordy_idx[] = {i, 1};
const S ky = coord[coordy_idx];
const int coordx_idx[] = {i, 2};
const S kx = coord[coordx_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int y0 = ceil(ky - width[ndim - 2] / 2.0);
const int z0 = ceil(kz - width[ndim - 3] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
const int y1 = floor(ky + width[ndim - 2] / 2.0);
const int z1 = floor(kz + width[ndim - 3] / 2.0);
for (int z = z0; z < z1 + 1; z++) {
const S wz = kernel(
((S) z - kz) / (width[ndim - 3] / 2.0),
param[ndim - 3]);
for (int y = y0; y < y1 + 1; y++) {
const S wy = wz * kernel(
((S) y - ky) / (width[ndim - 2] / 2.0),
param[ndim - 2]);
for (int x = x0; x < x1 + 1; x++) {
const S w = wy * kernel(
((S) x - kx) / (width[ndim - 1] / 2.0),
param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, i};
const T v = (T) w * input[input_idx];
const int output_idx[] = {
b, mod(z, nz), mod(y, ny), mod(x, nx)};
atomicAdd(&output[output_idx], v);
}
}
}
}
""", name='gridding3', preamble=kernel + mod_cuda,
reduce_dims=False)
return _gridding1_cuda, _gridding2_cuda, _gridding3_cuda
def _get_gridding_cuda_complex(kernel):
if kernel == 'spline':
kernel = _spline_kernel_cuda
elif kernel == 'kaiser_bessel':
kernel = _kaiser_bessel_kernel_cuda
_gridding1_cuda_complex = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param',
'raw T output',
"""
const int ndim = 1;
const int batch_size = output.shape()[0];
const int nx = output.shape()[1];
const int coord_idx[] = {i, 0};
const S kx = coord[coord_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
for (int x = x0; x < x1 + 1; x++) {
const S w = kernel(
((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, i};
const T v = (T) w * input[input_idx];
const int output_idx[] = {b, mod(x, nx)};
atomicAdd(
reinterpret_cast<T::value_type*>(
&(output[output_idx])), v.real());
atomicAdd(
reinterpret_cast<T::value_type*>(
&(output[output_idx])) + 1, v.imag());
}
}
""",
name='gridding1_complex',
preamble=kernel + mod_cuda,
reduce_dims=False)
_gridding2_cuda_complex = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param',
'raw T output',
"""
const int ndim = 2;
const int batch_size = output.shape()[0];
const int ny = output.shape()[1];
const int nx = output.shape()[2];
const int coordx_idx[] = {i, 1};
const S kx = coord[coordx_idx];
const int coordy_idx[] = {i, 0};
const S ky = coord[coordy_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int y0 = ceil(ky - width[ndim - 2] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
const int y1 = floor(ky + width[ndim - 2] / 2.0);
for (int y = y0; y < y1 + 1; y++) {
const S wy = kernel(
((S) y - ky) / (width[ndim - 2] / 2.0),
param[ndim - 2]);
for (int x = x0; x < x1 + 1; x++) {
const S w = wy * kernel(
((S) x - kx) / (width[ndim - 1] / 2.0),
param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, i};
const T v = (T) w * input[input_idx];
const int output_idx[] = {b, mod(y, ny), mod(x, nx)};
atomicAdd(reinterpret_cast<T::value_type*>(
&(output[output_idx])), v.real());
atomicAdd(reinterpret_cast<T::value_type*>(
&(output[output_idx])) + 1, v.imag());
}
}
}
""",
name='gridding2_complex',
preamble=kernel + mod_cuda,
reduce_dims=False)
_gridding3_cuda_complex = cp.ElementwiseKernel(
'raw T input, raw S coord, raw S width, raw S param',
'raw T output',
"""
const int ndim = 3;
const int batch_size = output.shape()[0];
const int nz = output.shape()[1];
const int ny = output.shape()[2];
const int nx = output.shape()[3];
const int coordz_idx[] = {i, 0};
const S kz = coord[coordz_idx];
const int coordy_idx[] = {i, 1};
const S ky = coord[coordy_idx];
const int coordx_idx[] = {i, 2};
const S kx = coord[coordx_idx];
const int x0 = ceil(kx - width[ndim - 1] / 2.0);
const int y0 = ceil(ky - width[ndim - 2] / 2.0);
const int z0 = ceil(kz - width[ndim - 3] / 2.0);
const int x1 = floor(kx + width[ndim - 1] / 2.0);
const int y1 = floor(ky + width[ndim - 2] / 2.0);
const int z1 = floor(kz + width[ndim - 3] / 2.0);
for (int z = z0; z < z1 + 1; z++) {
const S wz = kernel(
((S) z - kz) / (width[ndim - 3] / 2.0),
param[ndim - 3]);
for (int y = y0; y < y1 + 1; y++) {
const S wy = wz * kernel(
((S) y - ky) / (width[ndim - 2] / 2.0),
param[ndim - 2]);
for (int x = x0; x < x1 + 1; x++) {
const S w = wy * kernel(
((S) x - kx) / (width[ndim - 1] / 2.0),
param[ndim - 1]);
for (int b = 0; b < batch_size; b++) {
const int input_idx[] = {b, i};
const T v = (T) w * input[input_idx];
const int output_idx[] = {
b, mod(z, nz), mod(y, ny), mod(x, nx)};
atomicAdd(reinterpret_cast<T::value_type*>(
&(output[output_idx])), v.real());
atomicAdd(reinterpret_cast<T::value_type*>(
&(output[output_idx])) + 1, v.imag());
}
}
}
}
""",
name='gridding3_complex',
preamble=kernel + mod_cuda,
reduce_dims=False)
return _gridding1_cuda_complex, _gridding2_cuda_complex, \
_gridding3_cuda_complex
_interpolate_cuda = {}
_gridding_cuda = {}
_gridding_cuda_complex = {}
for kernel in KERNELS:
_interpolate_cuda[kernel] = _get_interpolate_cuda(kernel)
_gridding_cuda[kernel] = _get_gridding_cuda(kernel)
_gridding_cuda_complex[kernel] = _get_gridding_cuda_complex(kernel)