Source code for sigpy.block

# -*- coding: utf-8 -*-
"""Block reshape functions.

"""
import numpy as np
import numba as nb

from sigpy import backend, config, util


__all__ = ['array_to_blocks', 'blocks_to_array']


[docs]def array_to_blocks(input, blk_shape, blk_strides): """Extract blocks from an array in a sliding window manner. Args: input (array): input array of shape [..., N_1, ..., N_ndim] blk_shape (tuple): block shape of length ndim, with ndim={1, 2, 3}. blk_strides (tuple): block strides of length ndim. Returns: array: array of shape [...] + num_blks + blk_shape, where num_blks = (N - blk_shape + blk_strides) // blk_strides. Example: >>> input = np.array([0, 1, 2, 3, 4, 5]) >>> print(array_to_blocks(input, [2], [2])) [[0, 1], [2, 3], [4, 5]] """ if len(blk_shape) != len(blk_strides): raise ValueError('blk_shape must have the same length as blk_strides.') ndim = len(blk_shape) num_blks = [(i - b + s) // s for i, b, s in zip(input.shape[-ndim:], blk_shape, blk_strides)] batch_shape = list(input.shape[:-ndim]) batch_size = util.prod(batch_shape) xp = backend.get_array_module(input) output = xp.zeros([batch_size] + num_blks + list(blk_shape), dtype=input.dtype) input = input.reshape([batch_size] + list(input.shape[-ndim:])) if ndim == 1: if xp == np: _array_to_blocks1(output, input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1]) else: # pragma: no cover _array_to_blocks1_cuda(input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1], output, size=output.size) elif ndim == 2: if xp == np: _array_to_blocks2(output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2]) else: # pragma: no cover _array_to_blocks2_cuda(input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2], output, size=output.size) elif ndim == 3: if xp == np: _array_to_blocks3(output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3]) else: # pragma: no cover _array_to_blocks3_cuda(input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3], output, size=output.size) else: raise ValueError('Only support ndim=1, 2, or 3, got {}'.format(ndim)) return output.reshape(batch_shape + num_blks + list(blk_shape))
[docs]def blocks_to_array(input, oshape, blk_shape, blk_strides): """Accumulate blocks into an array in a sliding window manner. Args: input (array): input array of shape [...] + num_blks + blk_shape oshape (tuple): output shape. blk_shape (tuple): block shape of length ndim. blk_strides (tuple): block strides of length ndim. Returns: array: array of shape oshape. """ if len(blk_shape) != len(blk_strides): raise ValueError('blk_shape must have the same length as blk_strides.') ndim = len(blk_shape) num_blks = input.shape[-(2 * ndim):-ndim] batch_shape = list(oshape[:-ndim]) batch_size = util.prod(batch_shape) xp = backend.get_array_module(input) output = xp.zeros([batch_size] + list(oshape[-ndim:]), dtype=input.dtype) input = input.reshape([batch_size] + list(input.shape[-2 * ndim:])) if ndim == 1: if xp == np: _blocks_to_array1(output, input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1]) else: # pragma: no cover _blocks_to_array1_cuda(input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1], output, size=output.size) elif ndim == 2: if xp == np: _blocks_to_array2(output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2]) else: # pragma: no cover _blocks_to_array2_cuda(input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2], output, size=output.size) elif ndim == 3: if xp == np: _blocks_to_array3(output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3]) else: # pragma: no cover _blocks_to_array3_cuda( input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3], output, size=output.size) else: raise ValueError('Only support ndim=1, 2, or 3, got {}'.format(ndim)) return output.reshape(oshape)
@nb.jit(nopython=True, cache=True) # pragma: no cover def _array_to_blocks1(output, input, batch_size, Bx, Sx, Nx): for b in range(batch_size): for nx in range(Nx): for bx in range(Bx): ix = nx * Sx + bx if ix < input.shape[-1]: output[b, nx, bx] = input[b, ix] @nb.jit(nopython=True, cache=True) # pragma: no cover def _array_to_blocks2(output, input, batch_size, Bx, By, Sx, Sy, Nx, Ny): for b in range(batch_size): for ny in range(Ny): for nx in range(Nx): for by in range(By): for bx in range(Bx): iy = ny * Sy + by ix = nx * Sx + bx if ix < input.shape[-1] and iy < input.shape[-2]: output[b, ny, nx, by, bx] = input[b, iy, ix] @nb.jit(nopython=True, cache=True) # pragma: no cover def _array_to_blocks3(output, input, batch_size, Bx, By, Bz, Sx, Sy, Sz, Nx, Ny, Nz): for b in range(batch_size): for nz in range(Nz): for ny in range(Ny): for nx in range(Nx): for bz in range(Bz): for by in range(By): for bx in range(Bx): iz = nz * Sz + bz iy = ny * Sy + by ix = nx * Sx + bx if (ix < input.shape[-1] and iy < input.shape[-2] and iz < input.shape[-3]): output[b, nz, ny, nx, bz, by, bx] = input[b, iz, iy, ix] @nb.jit(nopython=True, cache=True) # pragma: no cover def _blocks_to_array1(output, input, batch_size, Bx, Sx, Nx): for b in range(batch_size): for ix in range(output.shape[-1]): rx = ix % Sx for bx in range(rx, Bx, Sx): nx = (ix - bx) // Sx if nx >= 0 and nx < Nx: output[b, ix] += input[b, nx, bx] @nb.jit(nopython=True, cache=True) # pragma: no cover def _blocks_to_array2(output, input, batch_size, Bx, By, Sx, Sy, Nx, Ny): for b in range(batch_size): for iy in range(output.shape[-2]): ry = iy % Sy for ix in range(output.shape[-1]): rx = ix % Sx for by in range(ry, By, Sy): ny = (iy - by) // Sy if ny >= 0 and ny < Ny: for bx in range(rx, Bx, Sx): nx = (ix - bx) // Sx if nx >= 0 and nx < Nx: output[b, iy, ix] += input[b, ny, nx, by, bx] @nb.jit(nopython=True, cache=True) # pragma: no cover def _blocks_to_array3(output, input, batch_size, Bx, By, Bz, Sx, Sy, Sz, Nx, Ny, Nz): for b in range(batch_size): for iz in range(output.shape[-3]): for iy in range(output.shape[-2]): for ix in range(output.shape[-1]): rz = iz % Sz ry = iy % Sy rx = ix % Sx for bz in range(rz, Bz, Sz): for by in range(ry, By, Sy): for bx in range(rx, Bx, Sx): nz = (iz - bz) // Sz ny = (iy - by) // Sy nx = (ix - bx) // Sx if nx >= 0 and nx < Nx \ and ny >= 0 and ny < Ny \ and nz >= 0 and nz < Nz: output[b, iz, iy, ix] += input[b, nz, ny, nx, bz, by, bx] if config.cupy_enabled: # pragma: no cover import cupy as cp _array_to_blocks1_cuda = cp.ElementwiseKernel( 'raw T input, int32 batch_size, int32 Bx, int32 Sx, int32 Nx', 'raw T output', """ const int ndim = input.ndim; int b = i / Bx / Nx; i -= b * Bx * Nx; int nx = i / Bx; i -= nx * Bx; int bx = i; int ix = nx * Sx + bx; if (ix < input.shape()[ndim - 1]) { int input_idx[] = {b, ix}; int output_idx[] = {b, nx, bx}; output[output_idx] = input[input_idx]; } """, name='_array_to_blocks1_cuda') _array_to_blocks2_cuda = cp.ElementwiseKernel( 'raw T input, int32 batch_size, int32 Bx, int32 By, ' 'int32 Sx, int32 Sy, int32 Nx, int32 Ny', 'raw T output', """ const int ndim = input.ndim; int b = i / Bx / By / Nx / Ny; i -= b * Bx * By * Nx * Ny; int ny = i / Bx / By / Nx; i -= ny * Bx * By * Nx; int nx = i / Bx / By; i -= nx * Bx * By; int by = i / Bx; i -= by * Bx; int bx = i; int iy = ny * Sy + by; int ix = nx * Sx + bx; if (ix < input.shape()[ndim - 1] && iy < input.shape()[ndim - 2]) { int input_idx[] = {b, iy, ix}; int output_idx[] = {b, ny, nx, by, bx}; output[output_idx] = input[input_idx]; } """, name='_array_to_blocks2_cuda') _array_to_blocks3_cuda = cp.ElementwiseKernel( 'raw T input, int32 batch_size, int32 Bx, int32 By, int32 Bz, ' 'int32 Sx, int32 Sy, int32 Sz, int32 Nx, int32 Ny, int32 Nz', 'raw T output', """ const int ndim = input.ndim; int b = i / Bx / By / Bz / Nx / Ny / Nz; i -= b * Bx * By * Bz * Nx * Ny * Nz; int nz = i / Bx / By / Bz / Nx / Ny; i -= nz * Bx * By * Bz * Nx * Ny; int ny = i / Bx / By / Bz / Nx; i -= ny * Bx * By * Bz * Nx; int nx = i / Bx / By / Bz; i -= nx * Bx * By * Bz; int bz = i / Bx / By; i -= bz * Bx * By; int by = i / Bx; i -= by * Bx; int bx = i; int iz = nz * Sz + bz; int iy = ny * Sy + by; int ix = nx * Sx + bx; if (ix < input.shape()[ndim - 1] && iy < input.shape()[ndim - 2] && iz < input.shape()[ndim - 3]) { int input_idx[] = {b, iz, iy, ix}; int output_idx[] = {b, nz, ny, nx, bz, by, bx}; output[output_idx] = input[input_idx]; } """, name='_array_to_blocks3_cuda') _blocks_to_array1_cuda = cp.ElementwiseKernel( 'raw T input, int32 batch_size, int32 Bx, int32 Sx, int32 Nx', 'raw T output', """ const int ndim = output.ndim; int Ix = output.shape()[ndim - 1]; int b = i / Ix; i -= b * Ix; int ix = i; int rx = ix % Sx; for (int bx = rx; bx < Bx; bx += Sx){ int nx = (ix - bx) / Sx; if ((nx >= 0) && (nx < Nx)) { int input_idx[] = {b, nx, bx}; int output_idx[] = {b, ix}; output[output_idx] += input[input_idx]; } } """, name='_blocks_to_array1_cuda') _blocks_to_array2_cuda = cp.ElementwiseKernel( 'raw T input, int32 batch_size, ' 'int32 Bx, int32 By, int32 Sx, int32 Sy, ' 'int32 Nx, int32 Ny', 'raw T output', """ const int ndim = output.ndim; int Ix = output.shape()[ndim - 1]; int Iy = output.shape()[ndim - 2]; int b = i / Ix / Iy; i -= b * Ix * Iy; int iy = i / Ix; i -= iy * Ix; int ix = i; int ry = iy % Sy; int rx = ix % Sx; for (int by = ry; by < By; by += Sy){ int ny = (iy - by) / Sy; if ((ny >= 0) && (ny < Ny)) { for (int bx = rx; bx < Bx; bx += Sx){ int nx = (ix - bx) / Sx; if ((nx >= 0) && (nx < Nx)) { int input_idx[] = {b, ny, nx, by, bx}; int output_idx[] = {b, iy, ix}; output[output_idx] += input[input_idx]; } } } } """, name='_blocks_to_array2_cuda') _blocks_to_array3_cuda = cp.ElementwiseKernel( 'raw T input, int32 batch_size, int32 Bx, int32 By, int32 Bz, ' 'int32 Sx, int32 Sy, int32 Sz, int32 Nx, int32 Ny, int32 Nz', 'raw T output', """ const int ndim = output.ndim; int Ix = output.shape()[ndim - 1]; int Iy = output.shape()[ndim - 2]; int Iz = output.shape()[ndim - 3]; int b = i / Ix / Iy / Iz; i -= b * Ix * Iy * Iz; int iz = i / Ix / Iy; i -= iz * Ix * Iy; int iy = i / Ix; i -= iy * Ix; int ix = i; int rz = iz % Sz; int ry = iy % Sy; int rx = ix % Sx; for (int bz = rz; bz < Bz; bz += Sz){ int nz = (iz - bz) / Sz; if ((nz >= 0) && (nz < Nz)) { for (int by = ry; by < By; by += Sy){ int ny = (iy - by) / Sy; if ((ny >= 0) && (ny < Ny)) { for (int bx = rx; bx < Bx; bx += Sx){ int nx = (ix - bx) / Sx; if ((nx >= 0) && (nx < Nx)) { int input_idx[] = {b, nz, ny, nx, bz, by, bx}; int output_idx[] = {b, iz, iy, ix}; output[output_idx] += input[input_idx]; } } } } } } """, name='_blocks_to_array3_cuda')