Source code for sigpy.interp

# -*- coding: utf-8 -*-
"""Interpolation functions.
"""
import numpy as np
import numba as nb

from sigpy import backend, config, util


__all__ = ['interpolate', 'gridding']


KERNELS = ['spline', 'kaiser_bessel']


[docs]def interpolate(input, coord, kernel='spline', width=2, param=1): r"""Interpolation from array to points specified by coordinates. Let :math:`x` be the input, :math:`y` be the output, :math:`c` be the coordinates, :math:`W` be the kernel width, and :math:`K` be the interpolation kernel, then the function computes, .. math :: y[j] = \sum_{i : \| i - c[j] \|_\infty \leq W / 2} K\left(\frac{i - c[j]}{W / 2}\right) x[i] There are two types of kernels: 'spline' and 'kaiser_bessel'. 'spline' uses the cardinal B-spline functions as kernels. The order of the spline can be specified using param. For example, param=1 performs linear interpolation. Concretely, for param=0, :math:`K(x) = 1`, for param=1, :math:`K(x) = 1 - |x|`, and for param=2, :math:`K(x) = \frac{9}{8} (1 - |x|)^2` for :math:`|x| > \frac{1}{3}` and :math:`K(x) = \frac{3}{4} (1 - 3 x^2)` for :math:`|x| < \frac{1}{3}`. These function expressions are derived from the reference wikipedia page by shifting and scaling the range to -1 to 1. When the coordinates specifies a uniformly spaced grid, it is recommended to use the original scaling with width=param + 1 so that the interpolation weights add up to one. 'kaiser_bessel' uses the Kaiser-Bessel function as kernel. Concretely, :math:`K(x) = I_0(\beta \sqrt{1 - x^2})`, where :math:`I_0` is the modified Bessel function of the first kind. The beta parameter can be specified with param. The modified Bessel function of the first kind is approximated using the power series, following the reference. Args: input (array): Input array of shape. coord (array): Coordinate array of shape [..., ndim] width (float or tuple of floats): Interpolation kernel full-width. kernel (str): Interpolation kernel, {'spline', 'kaiser_bessel'}. param (float or tuple of floats): Kernel parameter. Returns: output (array): Output array. References: https://en.wikipedia.org/wiki/Spline_wavelet#Cardinal_B-splines_of_small_orders http://people.math.sfu.ca/~cbm/aands/page_378.htm """ ndim = coord.shape[-1] batch_shape = input.shape[:-ndim] batch_size = util.prod(batch_shape) pts_shape = coord.shape[:-1] npts = util.prod(pts_shape) xp = backend.get_array_module(input) input = input.reshape([batch_size] + list(input.shape[-ndim:])) coord = coord.reshape([npts, ndim]) output = xp.zeros([batch_size, npts], dtype=input.dtype) if np.isscalar(param): param = xp.array([param] * ndim, coord.dtype) else: param = xp.array(param, coord.dtype) if np.isscalar(width): width = xp.array([width] * ndim, coord.dtype) else: width = xp.array(width, coord.dtype) if xp == np: _interpolate[kernel][ndim - 1](output, input, coord, width, param) else: # pragma: no cover _interpolate_cuda[kernel][ndim - 1]( input, coord, width, param, output, size=npts) return output.reshape(batch_shape + pts_shape)
[docs]def gridding(input, coord, shape, kernel="spline", width=2, param=1): r"""Gridding of points specified by coordinates to array. Let :math:`y` be the input, :math:`x` be the output, :math:`c` be the coordinates, :math:`W` be the kernel width, and :math:`K` be the interpolation kernel, then the function computes, .. math :: x[i] = \sum_{j : \| i - c[j] \|_\infty \leq W / 2} K\left(\frac{i - c[j]}{W / 2}\right) y[j] There are two types of kernels: 'spline' and 'kaiser_bessel'. 'spline' uses the cardinal B-spline functions as kernels. The order of the spline can be specified using param. For example, param=1 performs linear interpolation. Concretely, for param=0, :math:`K(x) = 1`, for param=1, :math:`K(x) = 1 - |x|`, and for param=2, :math:`K(x) = \frac{9}{8} (1 - |x|)^2` for :math:`|x| > \frac{1}{3}` and :math:`K(x) = \frac{3}{4} (1 - 3 x^2)` for :math:`|x| < \frac{1}{3}`. These function expressions are derived from the reference wikipedia page by shifting and scaling the range to -1 to 1. When the coordinates specifies a uniformly spaced grid, it is recommended to use the original scaling with width=param + 1 so that the interpolation weights add up to one. 'kaiser_bessel' uses the Kaiser-Bessel function as kernel. Concretely, :math:`K(x) = I_0(\beta \sqrt{1 - x^2})`, where :math:`I_0` is the modified Bessel function of the first kind. The beta parameter can be specified with param. The modified Bessel function of the first kind is approximated using the power series, following the reference. Args: input (array): Input array. coord (array): Coordinate array of shape [..., ndim] width (float or tuple of floats): Interpolation kernel full-width. kernel (str): Interpolation kernel, {"spline", "kaiser_bessel"}. param (float or tuple of floats): Kernel parameter. Returns: output (array): Output array. References: https://en.wikipedia.org/wiki/Spline_wavelet#Cardinal_B-splines_of_small_orders http://people.math.sfu.ca/~cbm/aands/page_378.htm """ ndim = coord.shape[-1] batch_shape = shape[:-ndim] batch_size = util.prod(batch_shape) pts_shape = coord.shape[:-1] npts = util.prod(pts_shape) xp = backend.get_array_module(input) isreal = np.issubdtype(input.dtype, np.floating) input = input.reshape([batch_size, npts]) coord = coord.reshape([npts, ndim]) output = xp.zeros([batch_size] + list(shape[-ndim:]), dtype=input.dtype) if np.isscalar(param): param = xp.array([param] * ndim, coord.dtype) else: param = xp.array(param, coord.dtype) if np.isscalar(width): width = xp.array([width] * ndim, coord.dtype) else: width = xp.array(width, coord.dtype) if xp == np: _gridding[kernel][ndim - 1](output, input, coord, width, param) else: # pragma: no cover if isreal: _gridding_cuda[kernel][ndim - 1]( input, coord, width, param, output, size=npts) else: _gridding_cuda_complex[kernel][ndim - 1]( input, coord, width, param, output, size=npts) return output.reshape(shape)
@nb.jit(nopython=True, cache=True) # pragma: no cover def _spline_kernel(x, order): if abs(x) > 1: return 0 if order == 0: return 1 elif order == 1: return 1 - abs(x) elif order == 2: if abs(x) > 1 / 3: return 9 / 8 * (1 - abs(x))**2 else: return 3 / 4 * (1 - 3 * x**2) @nb.jit(nopython=True, cache=True) # pragma: no cover def _kaiser_bessel_kernel(x, beta): if abs(x) > 1: return 0 x = beta * (1 - x**2)**0.5 t = x / 3.75 if x < 3.75: return 1 + 3.5156229 * t**2 + 3.0899424 * t**4 +\ 1.2067492 * t**6 + 0.2659732 * t**8 +\ 0.0360768 * t**10 + 0.0045813 * t**12 else: return x**-0.5 * np.exp(x) * ( 0.39894228 + 0.01328592 * t**-1 + 0.00225319 * t**-2 - 0.00157565 * t**-3 + 0.00916281 * t**-4 - 0.02057706 * t**-5 + 0.02635537 * t**-6 - 0.01647633 * t**-7 + 0.00392377 * t**-8) def _get_interpolate(kernel): if kernel == 'spline': kernel = _spline_kernel elif kernel == 'kaiser_bessel': kernel = _kaiser_bessel_kernel @nb.jit(nopython=True) # pragma: no cover def _interpolate1(output, input, coord, width, param): batch_size, nx = input.shape npts = coord.shape[0] for i in range(npts): kx = coord[i, -1] x0 = np.ceil(kx - width[-1] / 2) x1 = np.floor(kx + width[-1] / 2) for x in range(x0, x1 + 1): w = kernel((x - kx) / (width[-1] / 2), param[-1]) for b in range(batch_size): output[b, i] += w * input[b, x % nx] return output @nb.jit(nopython=True) # pragma: no cover def _interpolate2(output, input, coord, width, param): batch_size, ny, nx = input.shape npts = coord.shape[0] for i in range(npts): kx, ky = coord[i, -1], coord[i, -2] x0, y0 = (np.ceil(kx - width[-1] / 2), np.ceil(ky - width[-2] / 2)) x1, y1 = (np.floor(kx + width[-1] / 2), np.floor(ky + width[-2] / 2)) for y in range(y0, y1 + 1): wy = kernel((y - ky) / (width[-2] / 2), param[-2]) for x in range(x0, x1 + 1): w = wy * kernel((x - kx) / (width[-1] / 2), param[-1]) for b in range(batch_size): output[b, i] += w * input[b, y % ny, x % nx] return output @nb.jit(nopython=True) # pragma: no cover def _interpolate3(output, input, coord, width, param): batch_size, nz, ny, nx = input.shape npts = coord.shape[0] for i in range(npts): kx, ky, kz = coord[i, -1], coord[i, -2], coord[i, -3] x0, y0, z0 = (np.ceil(kx - width[-1] / 2), np.ceil(ky - width[-2] / 2), np.ceil(kz - width[-3] / 2)) x1, y1, z1 = (np.floor(kx + width[-1] / 2), np.floor(ky + width[-2] / 2), np.floor(kz + width[-3] / 2)) for z in range(z0, z1 + 1): wz = kernel((z - kz) / (width[-3] / 2), param[-3]) for y in range(y0, y1 + 1): wy = wz * kernel((y - ky) / (width[-2] / 2), param[-2]) for x in range(x0, x1 + 1): w = wy * kernel((x - kx) / (width[-1] / 2), param[-1]) for b in range(batch_size): output[b, i] += w * input[ b, z % nz, y % ny, x % nx] return output return _interpolate1, _interpolate2, _interpolate3 def _get_gridding(kernel): if kernel == 'spline': kernel = _spline_kernel elif kernel == 'kaiser_bessel': kernel = _kaiser_bessel_kernel @nb.jit(nopython=True) # pragma: no cover def _gridding1(output, input, coord, width, param): batch_size, nx = output.shape npts = coord.shape[0] for i in range(npts): kx = coord[i, -1] x0 = np.ceil(kx - width[-1] / 2) x1 = np.floor(kx + width[-1] / 2) for x in range(x0, x1 + 1): w = kernel((x - kx) / (width[-1] / 2), param[-1]) for b in range(batch_size): output[b, x % nx] += w * input[b, i] return output @nb.jit(nopython=True) # pragma: no cover def _gridding2(output, input, coord, width, param): batch_size, ny, nx = output.shape npts = coord.shape[0] for i in range(npts): kx, ky = coord[i, -1], coord[i, -2] x0, y0 = (np.ceil(kx - width[-1] / 2), np.ceil(ky - width[-2] / 2)) x1, y1 = (np.floor(kx + width[-1] / 2), np.floor(ky + width[-2] / 2)) for y in range(y0, y1 + 1): wy = kernel((y - ky) / (width[-2] / 2), param[-2]) for x in range(x0, x1 + 1): w = wy * kernel((x - kx) / (width[-1] / 2), param[-1]) for b in range(batch_size): output[b, y % ny, x % nx] += w * input[b, i] return output @nb.jit(nopython=True) # pragma: no cover def _gridding3(output, input, coord, width, param): batch_size, nz, ny, nx = output.shape npts = coord.shape[0] for i in range(npts): kx, ky, kz = coord[i, -1], coord[i, -2], coord[i, -3] x0, y0, z0 = (np.ceil(kx - width[-1] / 2), np.ceil(ky - width[-2] / 2), np.ceil(kz - width[-3] / 2)) x1, y1, z1 = (np.floor(kx + width[-1] / 2), np.floor(ky + width[-2] / 2), np.floor(kz + width[-3] / 2)) for z in range(z0, z1 + 1): wz = kernel((z - kz) / (width[-3] / 2), param[-3]) for y in range(y0, y1 + 1): wy = wz * kernel((y - ky) / (width[-2] / 2), param[-2]) for x in range(x0, x1 + 1): w = wy * kernel( (x - kx) / (width[-1] / 2), param[-1]) for b in range(batch_size): output[b, z % nz, y % ny, x % nx] += w * input[ b, i] return output return _gridding1, _gridding2, _gridding3 _interpolate = {} _gridding = {} for kernel in KERNELS: _interpolate[kernel] = _get_interpolate(kernel) _gridding[kernel] = _get_gridding(kernel) if config.cupy_enabled: # pragma: no cover import cupy as cp _spline_kernel_cuda = """ __device__ inline S kernel(S x, S order) { if (fabsf(x) > 1) return 0; if (order == 0) return 1; else if (order == 1) return 1 - fabsf(x); else if (fabsf(x) > 1 / 3) return 9 / 8 * (1 - fabsf(x)) * (1 - fabsf(x)); else return 3 / 4 * (1 - 3 * x * x); } """ _kaiser_bessel_kernel_cuda = """ __device__ inline S kernel(S x, S beta) { if (fabsf(x) > 1) return 0; x = beta * sqrt(1 - x * x); S t = x / 3.75; S t2 = t * t; S t4 = t2 * t2; S t6 = t4 * t2; S t8 = t6 * t2; if (x < 3.75) { S t10 = t8 * t2; S t12 = t10 * t2; return 1 + 3.5156229 * t2 + 3.0899424 * t4 + 1.2067492 * t6 + 0.2659732 * t8 + 0.0360768 * t10 + 0.0045813 * t12; } else { S t3 = t * t2; S t5 = t3 * t2; S t7 = t5 * t2; return exp(x) / sqrt(x) * ( 0.39894228 + 0.01328592 / t + 0.00225319 / t2 - 0.00157565 / t3 + 0.00916281 / t4 - 0.02057706 / t5 + 0.02635537 / t6 - 0.01647633 / t7 + 0.00392377 / t8); } } """ mod_cuda = """ __device__ inline int mod(int x, int n) { return (x % n + n) % n; } """ def _get_interpolate_cuda(kernel): if kernel == 'spline': kernel = _spline_kernel_cuda elif kernel == 'kaiser_bessel': kernel = _kaiser_bessel_kernel_cuda _interpolate1_cuda = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 1; const int batch_size = input.shape()[0]; const int nx = input.shape()[1]; const int coord_idx[] = {i, 0}; const S kx = coord[coord_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); for (int x = x0; x < x1 + 1; x++) { const S w = kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, mod(x, nx)}; const T v = (T) w * input[input_idx]; const int output_idx[] = {b, i}; output[output_idx] += v; } } """, name='interpolate1', preamble=kernel + mod_cuda, reduce_dims=False) _interpolate2_cuda = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 2; const int batch_size = input.shape()[0]; const int ny = input.shape()[1]; const int nx = input.shape()[2]; const int coordx_idx[] = {i, 1}; const S kx = coord[coordx_idx]; const int coordy_idx[] = {i, 0}; const S ky = coord[coordy_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int y0 = ceil(ky - width[ndim - 2] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); const int y1 = floor(ky + width[ndim - 2] / 2.0); for (int y = y0; y < y1 + 1; y++) { const S wy = kernel( ((S) y - ky) / (width[ndim - 2] / 2.0), param[ndim - 2]); for (int x = x0; x < x1 + 1; x++) { const S w = wy * kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, mod(y, ny), mod(x, nx)}; const T v = (T) w * input[input_idx]; const int output_idx[] = {b, i}; output[output_idx] += v; } } } """, name='interpolate2', preamble=kernel + mod_cuda, reduce_dims=False) _interpolate3_cuda = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 3; const int batch_size = input.shape()[0]; const int nz = input.shape()[1]; const int ny = input.shape()[2]; const int nx = input.shape()[3]; const int coordz_idx[] = {i, 0}; const S kz = coord[coordz_idx]; const int coordy_idx[] = {i, 1}; const S ky = coord[coordy_idx]; const int coordx_idx[] = {i, 2}; const S kx = coord[coordx_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int y0 = ceil(ky - width[ndim - 2] / 2.0); const int z0 = ceil(kz - width[ndim - 3] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); const int y1 = floor(ky + width[ndim - 2] / 2.0); const int z1 = floor(kz + width[ndim - 3] / 2.0); for (int z = z0; z < z1 + 1; z++) { const S wz = kernel( ((S) z - kz) / (width[ndim - 3] / 2.0), param[ndim - 3]); for (int y = y0; y < y1 + 1; y++) { const S wy = wz * kernel( ((S) y - ky) / (width[ndim - 2] / 2.0), param[ndim - 2]); for (int x = x0; x < x1 + 1; x++) { const S w = wy * kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, mod(z, nz), mod(y, ny), mod(x, nx)}; const T v = (T) w * input[input_idx]; const int output_idx[] = {b, i}; output[output_idx] += v; } } } } """, name='interpolate3', preamble=kernel + mod_cuda, reduce_dims=False) return _interpolate1_cuda, _interpolate2_cuda, _interpolate3_cuda def _get_gridding_cuda(kernel): if kernel == 'spline': kernel = _spline_kernel_cuda elif kernel == 'kaiser_bessel': kernel = _kaiser_bessel_kernel_cuda _gridding1_cuda = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 1; const int batch_size = output.shape()[0]; const int nx = output.shape()[1]; const int coord_idx[] = {i, 0}; const S kx = coord[coord_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); for (int x = x0; x < x1 + 1; x++) { const S w = kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, i}; const T v = (T) w * input[input_idx]; const int output_idx[] = {b, mod(x, nx)}; atomicAdd(&output[output_idx], v); } } """, name='gridding1', preamble=kernel + mod_cuda, reduce_dims=False) _gridding2_cuda = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 2; const int batch_size = output.shape()[0]; const int ny = output.shape()[1]; const int nx = output.shape()[2]; const int coordx_idx[] = {i, 1}; const S kx = coord[coordx_idx]; const int coordy_idx[] = {i, 0}; const S ky = coord[coordy_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int y0 = ceil(ky - width[ndim - 2] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); const int y1 = floor(ky + width[ndim - 2] / 2.0); for (int y = y0; y < y1 + 1; y++) { const S wy = kernel( ((S) y - ky) / (width[ndim - 2] / 2.0), param[ndim - 2]); for (int x = x0; x < x1 + 1; x++) { const S w = wy * kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, i}; const T v = (T) w * input[input_idx]; const int output_idx[] = {b, mod(y, ny), mod(x, nx)}; atomicAdd(&output[output_idx], v); } } } """, name='gridding2', preamble=kernel + mod_cuda, reduce_dims=False) _gridding3_cuda = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 3; const int batch_size = output.shape()[0]; const int nz = output.shape()[1]; const int ny = output.shape()[2]; const int nx = output.shape()[3]; const int coordz_idx[] = {i, 0}; const S kz = coord[coordz_idx]; const int coordy_idx[] = {i, 1}; const S ky = coord[coordy_idx]; const int coordx_idx[] = {i, 2}; const S kx = coord[coordx_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int y0 = ceil(ky - width[ndim - 2] / 2.0); const int z0 = ceil(kz - width[ndim - 3] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); const int y1 = floor(ky + width[ndim - 2] / 2.0); const int z1 = floor(kz + width[ndim - 3] / 2.0); for (int z = z0; z < z1 + 1; z++) { const S wz = kernel( ((S) z - kz) / (width[ndim - 3] / 2.0), param[ndim - 3]); for (int y = y0; y < y1 + 1; y++) { const S wy = wz * kernel( ((S) y - ky) / (width[ndim - 2] / 2.0), param[ndim - 2]); for (int x = x0; x < x1 + 1; x++) { const S w = wy * kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, i}; const T v = (T) w * input[input_idx]; const int output_idx[] = { b, mod(z, nz), mod(y, ny), mod(x, nx)}; atomicAdd(&output[output_idx], v); } } } } """, name='gridding3', preamble=kernel + mod_cuda, reduce_dims=False) return _gridding1_cuda, _gridding2_cuda, _gridding3_cuda def _get_gridding_cuda_complex(kernel): if kernel == 'spline': kernel = _spline_kernel_cuda elif kernel == 'kaiser_bessel': kernel = _kaiser_bessel_kernel_cuda _gridding1_cuda_complex = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 1; const int batch_size = output.shape()[0]; const int nx = output.shape()[1]; const int coord_idx[] = {i, 0}; const S kx = coord[coord_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); for (int x = x0; x < x1 + 1; x++) { const S w = kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, i}; const T v = (T) w * input[input_idx]; const int output_idx[] = {b, mod(x, nx)}; atomicAdd( reinterpret_cast<T::value_type*>( &(output[output_idx])), v.real()); atomicAdd( reinterpret_cast<T::value_type*>( &(output[output_idx])) + 1, v.imag()); } } """, name='gridding1_complex', preamble=kernel + mod_cuda, reduce_dims=False) _gridding2_cuda_complex = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 2; const int batch_size = output.shape()[0]; const int ny = output.shape()[1]; const int nx = output.shape()[2]; const int coordx_idx[] = {i, 1}; const S kx = coord[coordx_idx]; const int coordy_idx[] = {i, 0}; const S ky = coord[coordy_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int y0 = ceil(ky - width[ndim - 2] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); const int y1 = floor(ky + width[ndim - 2] / 2.0); for (int y = y0; y < y1 + 1; y++) { const S wy = kernel( ((S) y - ky) / (width[ndim - 2] / 2.0), param[ndim - 2]); for (int x = x0; x < x1 + 1; x++) { const S w = wy * kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, i}; const T v = (T) w * input[input_idx]; const int output_idx[] = {b, mod(y, ny), mod(x, nx)}; atomicAdd(reinterpret_cast<T::value_type*>( &(output[output_idx])), v.real()); atomicAdd(reinterpret_cast<T::value_type*>( &(output[output_idx])) + 1, v.imag()); } } } """, name='gridding2_complex', preamble=kernel + mod_cuda, reduce_dims=False) _gridding3_cuda_complex = cp.ElementwiseKernel( 'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """ const int ndim = 3; const int batch_size = output.shape()[0]; const int nz = output.shape()[1]; const int ny = output.shape()[2]; const int nx = output.shape()[3]; const int coordz_idx[] = {i, 0}; const S kz = coord[coordz_idx]; const int coordy_idx[] = {i, 1}; const S ky = coord[coordy_idx]; const int coordx_idx[] = {i, 2}; const S kx = coord[coordx_idx]; const int x0 = ceil(kx - width[ndim - 1] / 2.0); const int y0 = ceil(ky - width[ndim - 2] / 2.0); const int z0 = ceil(kz - width[ndim - 3] / 2.0); const int x1 = floor(kx + width[ndim - 1] / 2.0); const int y1 = floor(ky + width[ndim - 2] / 2.0); const int z1 = floor(kz + width[ndim - 3] / 2.0); for (int z = z0; z < z1 + 1; z++) { const S wz = kernel( ((S) z - kz) / (width[ndim - 3] / 2.0), param[ndim - 3]); for (int y = y0; y < y1 + 1; y++) { const S wy = wz * kernel( ((S) y - ky) / (width[ndim - 2] / 2.0), param[ndim - 2]); for (int x = x0; x < x1 + 1; x++) { const S w = wy * kernel( ((S) x - kx) / (width[ndim - 1] / 2.0), param[ndim - 1]); for (int b = 0; b < batch_size; b++) { const int input_idx[] = {b, i}; const T v = (T) w * input[input_idx]; const int output_idx[] = { b, mod(z, nz), mod(y, ny), mod(x, nx)}; atomicAdd(reinterpret_cast<T::value_type*>( &(output[output_idx])), v.real()); atomicAdd(reinterpret_cast<T::value_type*>( &(output[output_idx])) + 1, v.imag()); } } } } """, name='gridding3_complex', preamble=kernel + mod_cuda, reduce_dims=False) return _gridding1_cuda_complex, _gridding2_cuda_complex, \ _gridding3_cuda_complex _interpolate_cuda = {} _gridding_cuda = {} _gridding_cuda_complex = {} for kernel in KERNELS: _interpolate_cuda[kernel] = _get_interpolate_cuda(kernel) _gridding_cuda[kernel] = _get_gridding_cuda(kernel) _gridding_cuda_complex[kernel] = _get_gridding_cuda_complex(kernel)