Source code for sigpy.block

# -*- coding: utf-8 -*-
"""Block reshape functions.

"""
import numba as nb
import numpy as np

from sigpy import backend, config, util

__all__ = ["array_to_blocks", "blocks_to_array"]


[docs]def array_to_blocks(input, blk_shape, blk_strides): """Extract blocks from an array in a sliding window manner. Args: input (array): input array of shape [..., N_1, ..., N_ndim] blk_shape (tuple): block shape of length ndim, with ndim={1, 2, 3}. blk_strides (tuple): block strides of length ndim. Returns: array: array of shape [...] + num_blks + blk_shape, where num_blks = (N - blk_shape + blk_strides) // blk_strides. Example: >>> input = np.array([0, 1, 2, 3, 4, 5]) >>> print(array_to_blocks(input, [2], [2])) [[0, 1], [2, 3], [4, 5]] """ if len(blk_shape) != len(blk_strides): raise ValueError("blk_shape must have the same length as blk_strides.") ndim = len(blk_shape) num_blks = [ (i - b + s) // s for i, b, s in zip(input.shape[-ndim:], blk_shape, blk_strides) ] batch_shape = list(input.shape[:-ndim]) batch_size = util.prod(batch_shape) xp = backend.get_array_module(input) output = xp.zeros( [batch_size] + num_blks + list(blk_shape), dtype=input.dtype ) input = input.reshape([batch_size] + list(input.shape[-ndim:])) if ndim == 1: if xp == np: _array_to_blocks1( output, input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1], ) else: # pragma: no cover _array_to_blocks1_cuda( input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1], output, size=output.size, ) elif ndim == 2: if xp == np: _array_to_blocks2( output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2], ) else: # pragma: no cover _array_to_blocks2_cuda( input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2], output, size=output.size, ) elif ndim == 3: if xp == np: _array_to_blocks3( output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3], ) else: # pragma: no cover _array_to_blocks3_cuda( input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3], output, size=output.size, ) else: raise ValueError("Only support ndim=1, 2, or 3, got {}".format(ndim)) return output.reshape(batch_shape + num_blks + list(blk_shape))
[docs]def blocks_to_array(input, oshape, blk_shape, blk_strides): """Accumulate blocks into an array in a sliding window manner. Args: input (array): input array of shape [...] + num_blks + blk_shape oshape (tuple): output shape. blk_shape (tuple): block shape of length ndim. blk_strides (tuple): block strides of length ndim. Returns: array: array of shape oshape. """ if len(blk_shape) != len(blk_strides): raise ValueError("blk_shape must have the same length as blk_strides.") ndim = len(blk_shape) num_blks = input.shape[-(2 * ndim) : -ndim] batch_shape = list(oshape[:-ndim]) batch_size = util.prod(batch_shape) xp = backend.get_array_module(input) output = xp.zeros([batch_size] + list(oshape[-ndim:]), dtype=input.dtype) input = input.reshape([batch_size] + list(input.shape[-2 * ndim :])) if ndim == 1: if xp == np: _blocks_to_array1( output, input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1], ) else: # pragma: no cover _blocks_to_array1_cuda( input, batch_size, blk_shape[-1], blk_strides[-1], num_blks[-1], output, size=output.size, ) elif ndim == 2: if xp == np: _blocks_to_array2( output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2], ) else: # pragma: no cover _blocks_to_array2_cuda( input, batch_size, blk_shape[-1], blk_shape[-2], blk_strides[-1], blk_strides[-2], num_blks[-1], num_blks[-2], output, size=output.size, ) elif ndim == 3: if xp == np: _blocks_to_array3( output, input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3], ) else: # pragma: no cover _blocks_to_array3_cuda( input, batch_size, blk_shape[-1], blk_shape[-2], blk_shape[-3], blk_strides[-1], blk_strides[-2], blk_strides[-3], num_blks[-1], num_blks[-2], num_blks[-3], output, size=output.size, ) else: raise ValueError("Only support ndim=1, 2, or 3, got {}".format(ndim)) return output.reshape(oshape)
@nb.jit(nopython=True, cache=True) # pragma: no cover def _array_to_blocks1(output, input, batch_size, Bx, Sx, Nx): for b in range(batch_size): for nx in range(Nx): for bx in range(Bx): ix = nx * Sx + bx if ix < input.shape[-1]: output[b, nx, bx] = input[b, ix] @nb.jit(nopython=True, cache=True) # pragma: no cover def _array_to_blocks2(output, input, batch_size, Bx, By, Sx, Sy, Nx, Ny): for b in range(batch_size): for ny in range(Ny): for nx in range(Nx): for by in range(By): for bx in range(Bx): iy = ny * Sy + by ix = nx * Sx + bx if ix < input.shape[-1] and iy < input.shape[-2]: output[b, ny, nx, by, bx] = input[b, iy, ix] @nb.jit(nopython=True, cache=True) # pragma: no cover def _array_to_blocks3( output, input, batch_size, Bx, By, Bz, Sx, Sy, Sz, Nx, Ny, Nz ): for b in range(batch_size): for nz in range(Nz): for ny in range(Ny): for nx in range(Nx): for bz in range(Bz): for by in range(By): for bx in range(Bx): iz = nz * Sz + bz iy = ny * Sy + by ix = nx * Sx + bx if ( ix < input.shape[-1] and iy < input.shape[-2] and iz < input.shape[-3] ): output[b, nz, ny, nx, bz, by, bx] = input[ b, iz, iy, ix ] @nb.jit(nopython=True, cache=True) # pragma: no cover def _blocks_to_array1(output, input, batch_size, Bx, Sx, Nx): for b in range(batch_size): for ix in range(output.shape[-1]): rx = ix % Sx for bx in range(rx, Bx, Sx): nx = (ix - bx) // Sx if nx >= 0 and nx < Nx: output[b, ix] += input[b, nx, bx] @nb.jit(nopython=True, cache=True) # pragma: no cover def _blocks_to_array2(output, input, batch_size, Bx, By, Sx, Sy, Nx, Ny): for b in range(batch_size): for iy in range(output.shape[-2]): ry = iy % Sy for ix in range(output.shape[-1]): rx = ix % Sx for by in range(ry, By, Sy): ny = (iy - by) // Sy if ny >= 0 and ny < Ny: for bx in range(rx, Bx, Sx): nx = (ix - bx) // Sx if nx >= 0 and nx < Nx: output[b, iy, ix] += input[b, ny, nx, by, bx] @nb.jit(nopython=True, cache=True) # pragma: no cover def _blocks_to_array3( output, input, batch_size, Bx, By, Bz, Sx, Sy, Sz, Nx, Ny, Nz ): for b in range(batch_size): for iz in range(output.shape[-3]): for iy in range(output.shape[-2]): for ix in range(output.shape[-1]): rz = iz % Sz ry = iy % Sy rx = ix % Sx for bz in range(rz, Bz, Sz): for by in range(ry, By, Sy): for bx in range(rx, Bx, Sx): nz = (iz - bz) // Sz ny = (iy - by) // Sy nx = (ix - bx) // Sx if ( nx >= 0 and nx < Nx and ny >= 0 and ny < Ny and nz >= 0 and nz < Nz ): output[b, iz, iy, ix] += input[ b, nz, ny, nx, bz, by, bx ] if config.cupy_enabled: # pragma: no cover import cupy as cp _array_to_blocks1_cuda = cp.ElementwiseKernel( "raw T input, int32 batch_size, int32 Bx, int32 Sx, int32 Nx", "raw T output", """ const int ndim = input.ndim; int b = i / Bx / Nx; i -= b * Bx * Nx; int nx = i / Bx; i -= nx * Bx; int bx = i; int ix = nx * Sx + bx; if (ix < input.shape()[ndim - 1]) { int input_idx[] = {b, ix}; int output_idx[] = {b, nx, bx}; output[output_idx] = input[input_idx]; } """, name="_array_to_blocks1_cuda", ) _array_to_blocks2_cuda = cp.ElementwiseKernel( "raw T input, int32 batch_size, int32 Bx, int32 By, " "int32 Sx, int32 Sy, int32 Nx, int32 Ny", "raw T output", """ const int ndim = input.ndim; int b = i / Bx / By / Nx / Ny; i -= b * Bx * By * Nx * Ny; int ny = i / Bx / By / Nx; i -= ny * Bx * By * Nx; int nx = i / Bx / By; i -= nx * Bx * By; int by = i / Bx; i -= by * Bx; int bx = i; int iy = ny * Sy + by; int ix = nx * Sx + bx; if (ix < input.shape()[ndim - 1] && iy < input.shape()[ndim - 2]) { int input_idx[] = {b, iy, ix}; int output_idx[] = {b, ny, nx, by, bx}; output[output_idx] = input[input_idx]; } """, name="_array_to_blocks2_cuda", ) _array_to_blocks3_cuda = cp.ElementwiseKernel( "raw T input, int32 batch_size, int32 Bx, int32 By, int32 Bz, " "int32 Sx, int32 Sy, int32 Sz, int32 Nx, int32 Ny, int32 Nz", "raw T output", """ const int ndim = input.ndim; int b = i / Bx / By / Bz / Nx / Ny / Nz; i -= b * Bx * By * Bz * Nx * Ny * Nz; int nz = i / Bx / By / Bz / Nx / Ny; i -= nz * Bx * By * Bz * Nx * Ny; int ny = i / Bx / By / Bz / Nx; i -= ny * Bx * By * Bz * Nx; int nx = i / Bx / By / Bz; i -= nx * Bx * By * Bz; int bz = i / Bx / By; i -= bz * Bx * By; int by = i / Bx; i -= by * Bx; int bx = i; int iz = nz * Sz + bz; int iy = ny * Sy + by; int ix = nx * Sx + bx; if (ix < input.shape()[ndim - 1] && iy < input.shape()[ndim - 2] && iz < input.shape()[ndim - 3]) { int input_idx[] = {b, iz, iy, ix}; int output_idx[] = {b, nz, ny, nx, bz, by, bx}; output[output_idx] = input[input_idx]; } """, name="_array_to_blocks3_cuda", ) _blocks_to_array1_cuda = cp.ElementwiseKernel( "raw T input, int32 batch_size, int32 Bx, int32 Sx, int32 Nx", "raw T output", """ const int ndim = output.ndim; int Ix = output.shape()[ndim - 1]; int b = i / Ix; i -= b * Ix; int ix = i; int rx = ix % Sx; for (int bx = rx; bx < Bx; bx += Sx){ int nx = (ix - bx) / Sx; if ((nx >= 0) && (nx < Nx)) { int input_idx[] = {b, nx, bx}; int output_idx[] = {b, ix}; output[output_idx] += input[input_idx]; } } """, name="_blocks_to_array1_cuda", ) _blocks_to_array2_cuda = cp.ElementwiseKernel( "raw T input, int32 batch_size, " "int32 Bx, int32 By, int32 Sx, int32 Sy, " "int32 Nx, int32 Ny", "raw T output", """ const int ndim = output.ndim; int Ix = output.shape()[ndim - 1]; int Iy = output.shape()[ndim - 2]; int b = i / Ix / Iy; i -= b * Ix * Iy; int iy = i / Ix; i -= iy * Ix; int ix = i; int ry = iy % Sy; int rx = ix % Sx; for (int by = ry; by < By; by += Sy){ int ny = (iy - by) / Sy; if ((ny >= 0) && (ny < Ny)) { for (int bx = rx; bx < Bx; bx += Sx){ int nx = (ix - bx) / Sx; if ((nx >= 0) && (nx < Nx)) { int input_idx[] = {b, ny, nx, by, bx}; int output_idx[] = {b, iy, ix}; output[output_idx] += input[input_idx]; } } } } """, name="_blocks_to_array2_cuda", ) _blocks_to_array3_cuda = cp.ElementwiseKernel( "raw T input, int32 batch_size, int32 Bx, int32 By, int32 Bz, " "int32 Sx, int32 Sy, int32 Sz, int32 Nx, int32 Ny, int32 Nz", "raw T output", """ const int ndim = output.ndim; int Ix = output.shape()[ndim - 1]; int Iy = output.shape()[ndim - 2]; int Iz = output.shape()[ndim - 3]; int b = i / Ix / Iy / Iz; i -= b * Ix * Iy * Iz; int iz = i / Ix / Iy; i -= iz * Ix * Iy; int iy = i / Ix; i -= iy * Ix; int ix = i; int rz = iz % Sz; int ry = iy % Sy; int rx = ix % Sx; for (int bz = rz; bz < Bz; bz += Sz){ int nz = (iz - bz) / Sz; if ((nz >= 0) && (nz < Nz)) { for (int by = ry; by < By; by += Sy){ int ny = (iy - by) / Sy; if ((ny >= 0) && (ny < Ny)) { for (int bx = rx; bx < Bx; bx += Sx){ int nx = (ix - bx) / Sx; if ((nx >= 0) && (nx < Nx)) { int input_idx[] = {b, nz, ny, nx, bz, by, bx}; int output_idx[] = {b, iz, iy, ix}; output[output_idx] += input[input_idx]; } } } } } } """, name="_blocks_to_array3_cuda", )