Source code for mxnet.ndarray.sparse

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# coding: utf-8
# pylint: disable=wildcard-import, unused-wildcard-import, too-many-lines
"""Sparse NDArray API of MXNet."""

from __future__ import absolute_import
from __future__ import division
try:
    from __builtin__ import slice as py_slice
    from __builtin__ import sum as py_sum
except ImportError:
    from builtins import slice as py_slice
    from builtins import sum as py_sum

import ctypes
import warnings
from array import array as native_array

__all__ = ["_ndarray_cls", "csr_matrix", "row_sparse_array",
           "BaseSparseNDArray", "CSRNDArray", "RowSparseNDArray"]

import numpy as np
from ..base import NotSupportedForSparseNDArray
from ..base import _LIB, numeric_types
from ..base import c_array_buf, mx_real_t, integer_types
from ..base import mx_uint, NDArrayHandle, check_call
from ..context import Context
from . import _internal
from . import op
try:
    from .gen_sparse import * # pylint: disable=redefined-builtin
except ImportError:
    pass
from ._internal import _set_ndarray_class
from .ndarray import NDArray, _storage_type, _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP
from .ndarray import _STORAGE_TYPE_STR_TO_ID, _STORAGE_TYPE_ROW_SPARSE, _STORAGE_TYPE_CSR
from .ndarray import _STORAGE_TYPE_UNDEFINED, _STORAGE_TYPE_DEFAULT
from .ndarray import zeros as _zeros_ndarray
from .ndarray import array as _array


try:
    import scipy.sparse as spsp
except ImportError:
    spsp = None

_STORAGE_AUX_TYPES = {
    'row_sparse': [np.int64],
    'csr': [np.int64, np.int64]
}


def _new_alloc_handle(stype, shape, ctx, delay_alloc, dtype, aux_types, aux_shapes=None):
    """Return a new handle with specified storage type, shape, dtype and context.

    Empty handle is only used to hold results

    Returns
    -------
    handle
        A new empty ndarray handle
    """
    hdl = NDArrayHandle()
    for aux_t in aux_types:
        if np.dtype(aux_t) != np.dtype("int64"):
            raise NotImplementedError("only int64 is supported for aux types")
    aux_type_ids = [int(_DTYPE_NP_TO_MX[np.dtype(aux_t).type]) for aux_t in aux_types]
    aux_shapes = [(0,) for aux_t in aux_types] if aux_shapes is None else aux_shapes
    aux_shape_lens = [len(aux_shape) for aux_shape in aux_shapes]
    aux_shapes = py_sum(aux_shapes, ())
    num_aux = mx_uint(len(aux_types))
    check_call(_LIB.MXNDArrayCreateSparseEx(
        ctypes.c_int(int(_STORAGE_TYPE_STR_TO_ID[stype])),
        c_array_buf(mx_uint, native_array('I', shape)),
        mx_uint(len(shape)),
        ctypes.c_int(ctx.device_typeid),
        ctypes.c_int(ctx.device_id),
        ctypes.c_int(int(delay_alloc)),
        ctypes.c_int(int(_DTYPE_NP_TO_MX[np.dtype(dtype).type])),
        num_aux,
        c_array_buf(ctypes.c_int, native_array('i', aux_type_ids)),
        c_array_buf(mx_uint, native_array('I', aux_shape_lens)),
        c_array_buf(mx_uint, native_array('I', aux_shapes)),
        ctypes.byref(hdl)))
    return hdl


class BaseSparseNDArray(NDArray):
    """The base class of an NDArray stored in a sparse storage format.

    See CSRNDArray and RowSparseNDArray for more details.
    """

    def __repr__(self):
        """Returns a string representation of the sparse array."""
        shape_info = 'x'.join(['%d' % x for x in self.shape])
        # The data content is not displayed since the array usually has big shape
        return '\n<%s %s @%s>' % (self.__class__.__name__,
                                  shape_info, self.context)

    def __iadd__(self, other):
        raise NotImplementedError()

    def __isub__(self, other):
        raise NotImplementedError()

    def __imul__(self, other):
        raise NotImplementedError()

    def __idiv__(self, other):
        raise NotImplementedError()

    def __itruediv__(self, other):
        raise NotImplementedError()

    def _sync_copyfrom(self, source_array):
        raise NotImplementedError()

    def _at(self, idx):
        raise NotSupportedForSparseNDArray(self._at, '[idx]', idx)

    def _slice(self, start, stop):
        raise NotSupportedForSparseNDArray(self._slice, None, start, stop)

    def reshape(self, shape):
        raise NotSupportedForSparseNDArray(self.reshape, None, shape)

    @property
    def size(self):
        # the `size` for a sparse ndarray is ambiguous, hence disabled.
        raise NotImplementedError()

    def _aux_type(self, i):
        """Data-type of the array's ith aux data.

        Returns
        -------
        numpy.dtype
            This BaseSparseNDArray's aux data type.
        """
        aux_type = ctypes.c_int()
        check_call(_LIB.MXNDArrayGetAuxType(self.handle, i, ctypes.byref(aux_type)))
        return _DTYPE_MX_TO_NP[aux_type.value]

    @property
    def _num_aux(self):
        """The number of aux data used to help store the sparse ndarray.
        """
        return len(_STORAGE_AUX_TYPES[self.stype])

    @property
    def _aux_types(self):
        """The data types of the aux data for the BaseSparseNDArray.
        """
        aux_types = []
        num_aux = self._num_aux
        for i in range(num_aux):
            aux_types.append(self._aux_type(i))
        return aux_types

    def asnumpy(self):
        """Return a dense ``numpy.ndarray`` object with value copied from this array
        """
        return self.tostype('default').asnumpy()

    def astype(self, dtype):
        """Returns a copy of the array after casting to a specified type.
        Parameters
        ----------
        dtype : numpy.dtype or str
            The type of the returned array.
        Examples
        --------
        >>> x = mx.nd.sparse.zeros('row_sparse', (2,3), dtype='float32')
        >>> y = x.astype('int32')
        >>> y.dtype
        
        """
        res = zeros(shape=self.shape, ctx=self.context,
                    dtype=dtype, stype=self.stype)
        self.copyto(res)
        return res

    def copyto(self, other):
        """Copies the value of this array to another array.

        Parameters
        ----------
        other : NDArray or CSRNDArray or RowSparseNDArray or Context
            The destination array or context.

        Returns
        -------
        NDArray or CSRNDArray or RowSparseNDArray
            The copied array.
        """
        if isinstance(other, NDArray):
            if other.handle is self.handle:
                warnings.warn('You are attempting to copy an array to itself', RuntimeWarning)
                return
            return _internal._copyto(self, out=other)
        elif isinstance(other, Context):
            hret = _ndarray_cls(_new_alloc_handle(self.stype, self.shape, other,
                                                  True, self.dtype, self._aux_types))
            return _internal._copyto(self, out=hret)
        else:
            raise TypeError('copyto does not support type ' + str(type(other)))

    def check_format(self, full_check=True):
        """Check whether the NDArray format is valid.

        Parameters
        ----------
        full_check : bool, optional
            If `True`, rigorous check, O(N) operations. Otherwise
            basic check, O(1) operations (default True).
        """
        check_call(_LIB.MXNDArraySyncCheckFormat(self.handle, ctypes.c_bool(full_check)))

    def _data(self):
        """A deep copy NDArray of the data array associated with the BaseSparseNDArray.

        This function blocks. Do not use it in performance critical code.
        """
        self.wait_to_read()
        hdl = NDArrayHandle()
        check_call(_LIB.MXNDArrayGetDataNDArray(self.handle, ctypes.byref(hdl)))
        return NDArray(hdl)


    def _aux_data(self, i):
        """ Get a deep copy NDArray of the i-th aux data array associated with the
        BaseSparseNDArray.

        This function blocks. Do not use it in performance critical code.
        """
        self.wait_to_read()
        hdl = NDArrayHandle()
        check_call(_LIB.MXNDArrayGetAuxNDArray(self.handle, i, ctypes.byref(hdl)))
        return NDArray(hdl)


# pylint: disable=abstract-method
[docs]class CSRNDArray(BaseSparseNDArray): """A sparse representation of 2D NDArray in the Compressed Sparse Row format. A CSRNDArray represents an NDArray as three separate arrays: `data`, `indptr` and `indices`. It uses the CSR representation where the column indices for row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. The column indices for a given row are expected to be sorted in ascending order. Duplicate column entries for the same row are not allowed. Example ------- >>> a = mx.nd.array([[0, 1, 0], [2, 0, 0], [0, 0, 0], [0, 0, 3]]) >>> a = a.tostype('csr') >>> a.data.asnumpy() array([ 1., 2., 3.], dtype=float32) >>> a.indices.asnumpy() array([1, 0, 2]) >>> a.indptr.asnumpy() array([0, 1, 2, 2, 3]) See Also -------- csr_matrix: Several ways to construct a CSRNDArray """ def __reduce__(self): return CSRNDArray, (None,), super(CSRNDArray, self).__getstate__() def __iadd__(self, other): (self + other).copyto(self) return self def __isub__(self, other): (self - other).copyto(self) return self def __imul__(self, other): (self * other).copyto(self) return self def __idiv__(self, other): (self / other).copyto(self) return self def __itruediv__(self, other): (self / other).copyto(self) return self
[docs] def __getitem__(self, key): """x.__getitem__(i) <=> x[i] Returns a sliced view of this array. Parameters ---------- key : int or slice Indexing key. Examples -------- >>> indptr = np.array([0, 2, 3, 6]) >>> indices = np.array([0, 2, 2, 0, 1, 2]) >>> data = np.array([1, 2, 3, 4, 5, 6]) >>> a = mx.nd.sparse.csr_matrix((data, indices, indptr), shape=(3, 3)) >>> a.asnumpy() array([[ 1., 0., 2.], [ 0., 0., 3.], [ 4., 5., 6.]], dtype=float32) >>> a[1:2].asnumpy() array([[ 0., 0., 3.]], dtype=float32) >>> a[1].asnumpy() array([[ 0., 0., 3.]], dtype=float32) >>> a[-1].asnumpy() array([[ 4., 5., 6.]], dtype=float32) """ if isinstance(key, int): if key == -1: begin = self.shape[0] - 1 else: begin = key return op.slice(self, begin=begin, end=begin+1) if isinstance(key, py_slice): if key.step is not None: raise ValueError('CSRNDArray only supports continuous slicing on axis 0') if key.start is not None or key.stop is not None: begin = key.start if key.start else 0 end = key.stop if key.stop else self.shape[0] return op.slice(self, begin=begin, end=end) else: return self if isinstance(key, tuple): raise ValueError('Multi-dimension indexing is not supported')
[docs] def __setitem__(self, key, value): """x.__setitem__(i, y) <=> x[i]=y Set self[key] to value. Only slice key [:] is supported. Parameters ---------- key : slice The indexing key. value : NDArray or CSRNDArray or numpy.ndarray The value to set. Examples -------- >>> src = mx.nd.sparse.zeros('csr', (3,3)) >>> src.asnumpy() array([[ 0., 0., 0.], [ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) >>> # assign CSRNDArray with same storage type >>> x = mx.nd.ones('row_sparse', (3,3)).tostype('csr') >>> x[:] = src >>> x.asnumpy() array([[ 1., 1., 1.], [ 1., 1., 1.], [ 1., 1., 1.]], dtype=float32) >>> # assign NDArray to CSRNDArray >>> x[:] = mx.nd.ones((3,3)) * 2 >>> x.asnumpy() array([[ 2., 2., 2.], [ 2., 2., 2.], [ 2., 2., 2.]], dtype=float32) """ if not self.writable: raise ValueError('Failed to assign to a readonly CSRNDArray') if isinstance(key, py_slice): if key.step is not None or key.start is not None or key.stop is not None: raise ValueError('Assignment with slice for CSRNDArray is not ' \ 'implmented yet.') if isinstance(value, NDArray): # avoid copying to itself if value.handle is not self.handle: value.copyto(self) elif isinstance(value, numeric_types): raise ValueError("Assigning numeric types to CSRNDArray is " \ "not implemented yet.") elif isinstance(value, (np.ndarray, np.generic)): # TODO(haibin/anisub) check scipy.sparse and use _sync_copy_from to # avoid the temporary copy warnings.warn('Assigning non-NDArray object to CSRNDArray is not efficient', RuntimeWarning) tmp = _array(value) tmp.copyto(self) else: raise TypeError('type %s not supported' % str(type(value))) else: assert(isinstance(key, (int, tuple))) raise Exception('CSRNDArray only supports [:] for assignment')
@property def indices(self): """A deep copy NDArray of the indices array of the CSRNDArray. This generates a deep copy of the column indices of the current `csr` matrix. Returns ------- NDArray This CSRNDArray's indices array. """ return self._aux_data(1) @property def indptr(self): """A deep copy NDArray of the indptr array of the CSRNDArray. This generates a deep copy of the `indptr` of the current `csr` matrix. Returns ------- NDArray This CSRNDArray's indptr array. """ return self._aux_data(0) @property def data(self): """A deep copy NDArray of the data array of the CSRNDArray. This generates a deep copy of the `data` of the current `csr` matrix. Returns ------- NDArray This CSRNDArray's data array. """ return self._data() @indices.setter def indices(self, indices): raise NotImplementedError() @indptr.setter def indptr(self, indptr): raise NotImplementedError() @data.setter def data(self, data): raise NotImplementedError()
[docs] def tostype(self, stype): """Return a copy of the array with chosen storage type. Returns ------- NDArray or CSRNDArray A copy of the array with the chosen storage stype """ if stype == 'row_sparse': raise ValueError("cast_storage from csr to row_sparse is not supported") return op.cast_storage(self, stype=stype)
[docs] def copyto(self, other): """Copies the value of this array to another array. If ``other`` is a ``NDArray`` or ``CSRNDArray`` object, then ``other.shape`` and ``self.shape`` should be the same. This function copies the value from ``self`` to ``other``. If ``other`` is a context, a new ``CSRNDArray`` will be first created on the target context, and the value of ``self`` is copied. Parameters ---------- other : NDArray or CSRNDArray or Context The destination array or context. Returns ------- NDArray or CSRNDArray The copied array. If ``other`` is an ``NDArray`` or ``CSRNDArray``, then the return value and ``other`` will point to the same ``NDArray`` or ``CSRNDArray``. """ if isinstance(other, Context): return super(CSRNDArray, self).copyto(other) elif isinstance(other, NDArray): stype = other.stype if stype == 'default' or stype == 'csr': return super(CSRNDArray, self).copyto(other) else: raise TypeError('copyto does not support destination NDArray stype ' + str(stype)) else: raise TypeError('copyto does not support type ' + str(type(other)))
[docs] def asscipy(self): """Returns a ``scipy.sparse.csr.csr_matrix`` object with value copied from this array Examples -------- >>> x = mx.nd.sparse.zeros('csr', (2,3)) >>> y = x.asscipy() >>> type(y) >>> y <2x3 sparse matrix of type '' with 0 stored elements in Compressed Sparse Row format> """ data = self.data.asnumpy() indices = self.indices.asnumpy() indptr = self.indptr.asnumpy() if not spsp: raise ImportError("scipy is not available. \ Please check if the scipy python bindings are installed.") return spsp.csr_matrix((data, indices, indptr), shape=self.shape, dtype=self.dtype)
# pylint: disable=abstract-method
[docs]class RowSparseNDArray(BaseSparseNDArray): """A sparse representation of a set of NDArray row slices at given indices. A RowSparseNDArray represents a multidimensional NDArray using two separate arrays: `data` and `indices`. The number of dimensions has to be at least 2. - data: an NDArray of any dtype with shape [D0, D1, ..., Dn]. - indices: a 1-D int64 NDArray with shape [D0] with values sorted in ascending order. The `indices` stores the indices of the row slices with non-zeros, while the values are stored in `data`. The corresponding NDArray ``dense`` represented by RowSparseNDArray ``rsp`` has ``dense[rsp.indices[i], :, :, :, ...] = rsp.data[i, :, :, :, ...]`` >>> dense.asnumpy() array([[ 1., 2., 3.], [ 0., 0., 0.], [ 4., 0., 5.], [ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) >>> rsp = dense.tostype('row_sparse') >>> rsp.indices.asnumpy() array([0, 2], dtype=int64) >>> rsp.data.asnumpy() array([[ 1., 2., 3.], [ 4., 0., 5.]], dtype=float32) A RowSparseNDArray is typically used to represent non-zero row slices of a large NDArray of shape [LARGE0, D1, .. , Dn] where LARGE0 >> D0 and most row slices are zeros. RowSparseNDArray is used principally in the definition of gradients for operations that have sparse gradients (e.g. sparse dot and sparse embedding). See Also -------- row_sparse_array: Several ways to construct a RowSparseNDArray """ def __reduce__(self): return RowSparseNDArray, (None,), super(RowSparseNDArray, self).__getstate__() def __iadd__(self, other): (self + other).copyto(self) return self def __isub__(self, other): (self - other).copyto(self) return self def __imul__(self, other): (self * other).copyto(self) return self def __idiv__(self, other): (self / other).copyto(self) return self def __itruediv__(self, other): (self / other).copyto(self) return self
[docs] def __getitem__(self, key): """x.__getitem__(i) <=> x[i] Returns a sliced view of this array. Parameters ---------- key : slice Indexing key. Examples -------- >>> x = mx.nd.sparse.zeros('row_sparse', (2, 3)) >>> x[:].asnumpy() array([[ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) """ if isinstance(key, int): raise Exception("__getitem__ with int key is not implemented for RowSparseNDArray yet") if isinstance(key, py_slice): if key.step is not None or key.start is not None or key.stop is not None: raise Exception('RowSparseNDArray only supports [:] for __getitem__') else: return self if isinstance(key, tuple): raise ValueError('Multi-dimension indexing is not supported')
[docs] def __setitem__(self, key, value): """x.__setitem__(i, y) <=> x[i]=y Set self[key] to value. Only slice key [:] is supported. Parameters ---------- key : slice The indexing key. value : NDArray or numpy.ndarray The value to set. Examples -------- >>> src = mx.nd.row_sparse([[1, 0, 2], [4, 5, 6]], [0, 2], (3,3)) >>> src.asnumpy() array([[ 1., 0., 2.], [ 0., 0., 0.], [ 4., 5., 6.]], dtype=float32) >>> # assign RowSparseNDArray with same storage type >>> x = mx.nd.sparse.zeros('row_sparse', (3,3)) >>> x[:] = src >>> x.asnumpy() array([[ 1., 0., 2.], [ 0., 0., 0.], [ 4., 5., 6.]], dtype=float32) >>> # assign NDArray to RowSparseNDArray >>> x[:] = mx.nd.ones((3,3)) >>> x.asnumpy() array([[ 1., 1., 1.], [ 1., 1., 1.], [ 1., 1., 1.]], dtype=float32) """ if not self.writable: raise ValueError('Failed to assign to a readonly RowSparseNDArray') if isinstance(key, py_slice): if key.step is not None or key.start is not None or key.stop is not None: raise ValueError('Assignment with slice for RowSparseNDArray ' \ 'is not implmented yet.') if isinstance(value, NDArray): # avoid copying to itself if value.handle is not self.handle: value.copyto(self) elif isinstance(value, numeric_types): _internal._set_value(float(value), out=self) elif isinstance(value, (np.ndarray, np.generic)): warnings.warn('Assigning non-NDArray object to RowSparseNDArray is not efficient', RuntimeWarning) tmp = _array(value) tmp.copyto(self) else: raise TypeError('type %s not supported' % str(type(value))) else: assert(isinstance(key, (int, tuple))) raise TypeError('RowSparseNDArray only supports [:] for assignment')
@property def indices(self): """A deep copy NDArray of the indices array of the RowSparseNDArray. This generates a deep copy of the row indices of the current `row_sparse` matrix. Returns ------- NDArray This RowSparseNDArray's indices array. """ return self._aux_data(0) @property def data(self): """A deep copy NDArray of the data array of the RowSparseNDArray. This generates a deep copy of the `data` of the current `row_sparse` matrix. Returns ------- NDArray This RowSparseNDArray's data array. """ return self._data() @indices.setter def indices(self, indices): raise NotImplementedError() @data.setter def data(self, data): raise NotImplementedError()
[docs] def tostype(self, stype): """Return a copy of the array with chosen storage type. Returns ------- NDArray or RowSparseNDArray A copy of the array with the chosen storage stype """ if stype == 'csr': raise ValueError("cast_storage from row_sparse to csr is not supported") return op.cast_storage(self, stype=stype)
[docs] def copyto(self, other): """Copies the value of this array to another array. If ``other`` is a ``NDArray`` or ``RowSparseNDArray`` object, then ``other.shape`` and ``self.shape`` should be the same. This function copies the value from ``self`` to ``other``. If ``other`` is a context, a new ``RowSparseNDArray`` will be first created on the target context, and the value of ``self`` is copied. Parameters ---------- other : NDArray or RowSparseNDArray or Context The destination array or context. Returns ------- NDArray or RowSparseNDArray The copied array. If ``other`` is an ``NDArray`` or ``RowSparseNDArray``, then the return value and ``other`` will point to the same ``NDArray`` or ``RowSparseNDArray``. """ if isinstance(other, Context): return super(RowSparseNDArray, self).copyto(other) elif isinstance(other, NDArray): stype = other.stype if stype == 'default' or stype == 'row_sparse': return super(RowSparseNDArray, self).copyto(other) else: raise TypeError('copyto does not support destination NDArray stype ' + str(stype)) else: raise TypeError('copyto does not support type ' + str(type(other)))
[docs] def retain(self, *args, **kwargs): """Convenience fluent method for :py:func:`retain`. The arguments are the same as for :py:func:`retain`, with this array as data. """ return retain(self, *args, **kwargs)
def _prepare_src_array(source_array, dtype): """Prepare `source_array` so that it can be used to construct NDArray. `source_array` is converted to a `np.ndarray` if it's neither an `NDArray` \ nor an `np.ndarray`. """ if not isinstance(source_array, NDArray) and not isinstance(source_array, np.ndarray): try: source_array = np.array(source_array, dtype=dtype) except: raise TypeError('values must be array like object') return source_array def _prepare_default_dtype(src_array, dtype): """Prepare the value of dtype if `dtype` is None. If `src_array` is an NDArray, numpy.ndarray or scipy.sparse.csr.csr_matrix, return src_array.dtype. float32 is returned otherwise.""" if dtype is None: if isinstance(src_array, (NDArray, np.ndarray)): dtype = src_array.dtype elif spsp and isinstance(src_array, spsp.csr.csr_matrix): dtype = src_array.dtype else: dtype = mx_real_t return dtype def _check_shape(s1, s2): """check s1 == s2 if both are not None""" if s1 and s2 and s1 != s2: raise ValueError("Shape mismatch detected. " + str(s1) + " v.s. " + str(s2))
[docs]def csr_matrix(arg1, shape=None, ctx=None, dtype=None): """Creates a `CSRNDArray`, an 2D array with compressed sparse row (CSR) format. The CSRNDArray can be instantiated in several ways: - csr_matrix(D): to construct a CSRNDArray with a dense 2D array ``D`` - **D** (*array_like*) - An object exposing the array interface, an object whose \ `__array__` method returns an array, or any (nested) sequence. - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is ``D.dtype`` if ``D`` is an NDArray or numpy.ndarray, \ float32 otherwise. - csr_matrix(S) to construct a CSRNDArray with a sparse 2D array ``S`` - **S** (*CSRNDArray or scipy.sparse.csr.csr_matrix*) - A sparse matrix. - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is ``S.dtype``. - csr_matrix((M, N)) to construct an empty CSRNDArray with shape ``(M, N)`` - **M** (*int*) - Number of rows in the matrix - **N** (*int*) - Number of columns in the matrix - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is float32. - csr_matrix((data, indices, indptr)) to construct a CSRNDArray based on the definition of compressed sparse row format \ using three separate arrays, \ where the column indices for row i are stored in ``indices[indptr[i]:indptr[i+1]]`` \ and their corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. \ The column indices for a given row are expected to be **sorted in ascending order.** \ Duplicate column entries for the same row are not allowed. - **data** (*array_like*) - An object exposing the array interface, which \ holds all the non-zero entries of the matrix in row-major order. - **indices** (*array_like*) - An object exposing the array interface, which \ stores the column index for each non-zero element in ``data``. - **indptr** (*array_like*) - An object exposing the array interface, which \ stores the offset into ``data`` of the first non-zero element number of each \ row of the matrix. - **shape** (*tuple of int, optional*) - The shape of the array. The default \ shape is inferred from the indices and indptr arrays. - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is ``data.dtype`` if ``data`` is an NDArray or numpy.ndarray, \ float32 otherwise. - csr_matrix((data, (row, col))) to construct a CSRNDArray based on the COOrdinate format \ using three seperate arrays, \ where ``row[i]`` is the row index of the element, \ ``col[i]`` is the column index of the element \ and ``data[i]`` is the data corresponding to the element. All the missing \ elements in the input are taken to be zeroes. - **data** (*array_like*) - An object exposing the array interface, which \ holds all the non-zero entries of the matrix in COO format. - **row** (*array_like*) - An object exposing the array interface, which \ stores the row index for each non zero element in ``data``. - **col** (*array_like*) - An object exposing the array interface, which \ stores the col index for each non zero element in ``data``. - **shape** (*tuple of int, optional*) - The shape of the array. The default \ shape is inferred from the ``row`` and ``col`` arrays. - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is float32. Parameters ---------- arg1: tuple of int, tuple of array_like, array_like, CSRNDArray, scipy.sparse.csr_matrix, \ scipy.sparse.coo_matrix, tuple of int or tuple of array_like The argument to help instantiate the csr matrix. See above for further details. shape : tuple of int, optional The shape of the csr matrix. ctx: Context, optional Device context (default is the current default context). dtype: str or numpy.dtype, optional The data type of the output array. Returns ------- CSRNDArray A `CSRNDArray` with the `csr` storage representation. Example ------- >>> a = mx.nd.sparse.csr_matrix(([1, 2, 3], [1, 0, 2], [0, 1, 2, 2, 3]), shape=(4, 3)) >>> a.asnumpy() array([[ 0., 1., 0.], [ 2., 0., 0.], [ 0., 0., 0.], [ 0., 0., 3.]], dtype=float32) See Also -------- CSRNDArray : MXNet NDArray in compressed sparse row format. """ # construct a csr matrix from (M, N) or (data, indices, indptr) if isinstance(arg1, tuple): arg_len = len(arg1) if arg_len == 2: # construct a sparse csr matrix from # scipy coo matrix if input format is coo if isinstance(arg1[1], tuple) and len(arg1[1]) == 2: data, (row, col) = arg1 if isinstance(data, NDArray): data = data.asnumpy() if isinstance(row, NDArray): row = row.asnumpy() if isinstance(col, NDArray): col = col.asnumpy() coo = spsp.coo_matrix((data, (row, col)), shape=shape) _check_shape(coo.shape, shape) csr = coo.tocsr() return array(csr, ctx=ctx, dtype=dtype) else: # empty matrix with shape _check_shape(arg1, shape) return empty('csr', arg1, ctx=ctx, dtype=dtype) elif arg_len == 3: # data, indices, indptr return _csr_matrix_from_definition(arg1[0], arg1[1], arg1[2], shape=shape, ctx=ctx, dtype=dtype) else: raise ValueError("Unexpected length of input tuple: " + str(arg_len)) else: # construct a csr matrix from a sparse / dense one if isinstance(arg1, CSRNDArray) or (spsp and isinstance(arg1, spsp.csr.csr_matrix)): # construct a csr matrix from scipy or CSRNDArray _check_shape(arg1.shape, shape) return array(arg1, ctx=ctx, dtype=dtype) elif isinstance(arg1, RowSparseNDArray): raise ValueError("Unexpected input type: RowSparseNDArray") else: # construct a csr matrix from a dense one # prepare default ctx and dtype since mx.nd.array doesn't use default values # based on source_array dtype = _prepare_default_dtype(arg1, dtype) # create dns array with provided dtype. ctx is not passed since copy across # ctx requires dtype to be the same dns = _array(arg1, dtype=dtype) if ctx is not None and dns.context != ctx: dns = dns.as_in_context(ctx) _check_shape(dns.shape, shape) return dns.tostype('csr')
def _csr_matrix_from_definition(data, indices, indptr, shape=None, ctx=None, dtype=None, indices_type=None, indptr_type=None): """Create a `CSRNDArray` based on data, indices and indptr""" storage_type = 'csr' # context ctx = Context.default_ctx if ctx is None else ctx # types dtype = _prepare_default_dtype(data, dtype) indptr_type = _STORAGE_AUX_TYPES[storage_type][0] if indptr_type is None else indptr_type indices_type = _STORAGE_AUX_TYPES[storage_type][1] if indices_type is None else indices_type # prepare src array and types data = _prepare_src_array(data, dtype) indptr = _prepare_src_array(indptr, indptr_type) indices = _prepare_src_array(indices, indices_type) # TODO(junwu): Convert data, indptr, and indices to mxnet NDArrays # if they are not for now. In the future, we should provide a c-api # to accept np.ndarray types to copy from to result.data and aux_data if not isinstance(data, NDArray): data = _array(data, ctx, dtype) if not isinstance(indptr, NDArray): indptr = _array(indptr, ctx, indptr_type) if not isinstance(indices, NDArray): indices = _array(indices, ctx, indices_type) if shape is None: if indices.shape[0] == 0: raise ValueError('invalid shape') shape = (len(indptr) - 1, op.max(indices).asscalar() + 1) # verify shapes aux_shapes = [indptr.shape, indices.shape] if data.ndim != 1 or indptr.ndim != 1 or indices.ndim != 1 or \ indptr.shape[0] == 0 or len(shape) != 2: raise ValueError('invalid shape') result = CSRNDArray(_new_alloc_handle(storage_type, shape, ctx, False, dtype, [indptr_type, indices_type], aux_shapes)) check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, data.handle, ctypes.c_int(-1))) check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, indptr.handle, ctypes.c_int(0))) check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, indices.handle, ctypes.c_int(1))) return result
[docs]def row_sparse_array(arg1, shape=None, ctx=None, dtype=None): """Creates a `RowSparseNDArray`, a multidimensional row sparse array with a set of \ tensor slices at given indices. The RowSparseNDArray can be instantiated in several ways: - row_sparse_array(D): to construct a RowSparseNDArray with a dense ndarray ``D`` - **D** (*array_like*) - An object exposing the array interface, an object whose \ `__array__` method returns an array, or any (nested) sequence. - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is ``D.dtype`` if ``D`` is an NDArray or numpy.ndarray, \ float32 otherwise. - row_sparse_array(S) to construct a RowSparseNDArray with a sparse ndarray ``S`` - **S** (*RowSparseNDArray*) - A sparse ndarray. - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is ``S.dtype``. - row_sparse_array((D0, D1 .. Dn)) to construct an empty RowSparseNDArray with shape ``(D0, D1, ... Dn)`` - **D0, D1 .. Dn** (*int*) - The shape of the ndarray - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is float32. - row_sparse_array((data, indices)) to construct a RowSparseNDArray based on the definition of row sparse format \ using two separate arrays, \ where the `indices` stores the indices of the row slices with non-zeros, while the values are stored in `data`. The corresponding NDArray ``dense`` represented by RowSparseNDArray ``rsp`` has \ ``dense[rsp.indices[i], :, :, :, ...] = rsp.data[i, :, :, :, ...]`` The row indices for are expected to be **sorted in ascending order.** \ - **data** (*array_like*) - An object exposing the array interface, which \ holds all the non-zero row slices of the array. - **indices** (*array_like*) - An object exposing the array interface, which \ stores the row index for each row slice with non-zero elements. - **shape** (*tuple of int, optional*) - The shape of the array. The default \ shape is inferred from the indices and indptr arrays. - **ctx** (*Context, optional*) - Device context \ (default is the current default context). - **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \ The default dtype is float32. Parameters ---------- arg1: NDArray, numpy.ndarray, RowSparseNDArray, tuple of int or tuple of array_like The argument to help instantiate the row sparse ndarray. See above for further details. shape : tuple of int, optional The shape of the row sparse ndarray. ctx : Context, optional Device context (default is the current default context). dtype : str or numpy.dtype, optional The data type of the output array. Returns ------- RowSparseNDArray An `RowSparseNDArray` with the `row_sparse` storage representation. Example ------- >>> a = mx.nd.sparse.row_sparse_array(([[1, 2], [3, 4]], [1, 4]), shape=(6, 2)) >>> a.asnumpy() array([[ 0., 0.], [ 1., 2.], [ 0., 0.], [ 0., 0.], [ 3., 4.], [ 0., 0.]], dtype=float32) See Also -------- RowSparseNDArray : MXNet NDArray in row sparse format. """ # construct a row sparse array from (D0, D1 ..) or (data, indices) if isinstance(arg1, tuple): arg_len = len(arg1) if arg_len < 2: raise ValueError("Unexpected length of input tuple: " + str(arg_len)) elif arg_len > 2: # empty ndarray with shape _check_shape(arg1, shape) return empty('row_sparse', arg1, ctx=ctx, dtype=dtype) else: # len(arg1) = 2, is either shape or (data, indices) if isinstance(arg1[0], integer_types) and isinstance(arg1[1], integer_types): # empty ndarray with shape _check_shape(arg1, shape) return empty('row_sparse', arg1, ctx=ctx, dtype=dtype) else: # data, indices, indptr return _row_sparse_ndarray_from_definition(arg1[0], arg1[1], shape=shape, ctx=ctx, dtype=dtype) else: # construct a row sparse ndarray from a dense / sparse array if isinstance(arg1, RowSparseNDArray): # construct a row sparse ndarray from RowSparseNDArray _check_shape(arg1.shape, shape) return array(arg1, ctx=ctx, dtype=dtype) elif isinstance(arg1, CSRNDArray): raise ValueError("Unexpected input type: CSRNDArray") else: # construct a csr matrix from a dense one # prepare default dtype since mx.nd.array doesn't use default values # based on source_array dtype = _prepare_default_dtype(arg1, dtype) # create dns array with provided dtype. ctx is not passed since copy across # ctx requires dtype to be the same dns = _array(arg1, dtype=dtype) if ctx is not None and dns.context != ctx: dns = dns.as_in_context(ctx) _check_shape(dns.shape, shape) return dns.tostype('row_sparse')
def _row_sparse_ndarray_from_definition(data, indices, shape=None, ctx=None, dtype=None, indices_type=None): """Create a `RowSparseNDArray` based on data and indices""" storage_type = 'row_sparse' # context ctx = Context.default_ctx if ctx is None else ctx # types dtype = _prepare_default_dtype(data, dtype) indices_type = _STORAGE_AUX_TYPES[storage_type][0] if indices_type is None else indices_type # prepare src array and types data = _prepare_src_array(data, dtype) indices = _prepare_src_array(indices, indices_type) # TODO(junwu): Convert data, indptr, and indices to mxnet NDArrays # if they are not for now. In the future, we should provide a c-api # to accept np.ndarray types to copy from to result.data and aux_data if not isinstance(data, NDArray): data = _array(data, ctx, dtype) if not isinstance(indices, NDArray): indices = _array(indices, ctx, indices_type) if shape is None: num_indices = indices.shape[0] if num_indices == 0: raise ValueError('invalid shape') dim0 = indices[num_indices - 1].asscalar() + 1 shape = (dim0, ) + data.shape[1:] # verify shapes if data.ndim != len(shape) or indices.ndim != 1 or np.prod(shape[1:]) == 0: raise ValueError("invalid shape") result = RowSparseNDArray(_new_alloc_handle(storage_type, shape, ctx, False, dtype, [indices_type], [indices.shape])) check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, data.handle, ctypes.c_int(-1))) check_call(_LIB.MXNDArraySyncCopyFromNDArray(result.handle, indices.handle, ctypes.c_int(0))) return result def _ndarray_cls(handle, writable=True, stype=_STORAGE_TYPE_UNDEFINED): if stype == _STORAGE_TYPE_UNDEFINED: stype = _storage_type(handle) if stype == _STORAGE_TYPE_DEFAULT: return NDArray(handle, writable=writable) elif stype == _STORAGE_TYPE_CSR: return CSRNDArray(handle, writable=writable) elif stype == _STORAGE_TYPE_ROW_SPARSE: return RowSparseNDArray(handle, writable=writable) else: raise Exception("unknown storage type: %s"%stype) _set_ndarray_class(_ndarray_cls)
[docs]def zeros(stype, shape, ctx=None, dtype=None, **kwargs): """Return a new array of given shape and type, filled with zeros. Parameters ---------- stype: string The storage type of the empty array, such as 'row_sparse', 'csr', etc shape : int or tuple of int The shape of the empty array ctx : Context, optional An optional device context (default is the current default context) dtype : str or numpy.dtype, optional An optional value type (default is `float32`) Returns ------- RowSparseNDArray or CSRNDArray A created array Examples -------- >>> mx.nd.sparse.zeros('csr', (1,2)) >>> mx.nd.sparse.zeros('row_sparse', (1,2), ctx=mx.cpu(), dtype='float16').asnumpy() array([[ 0., 0.]], dtype=float16) """ if stype == 'default': return _zeros_ndarray(shape, ctx=ctx, dtype=dtype, **kwargs) if ctx is None: ctx = Context.default_ctx dtype = mx_real_t if dtype is None else dtype if stype == 'row_sparse' or stype == 'csr': aux_types = _STORAGE_AUX_TYPES[stype] else: raise ValueError("unknown storage type" + stype) out = _ndarray_cls(_new_alloc_handle(stype, shape, ctx, True, dtype, aux_types)) return _internal._zeros(shape=shape, ctx=ctx, dtype=dtype, out=out, **kwargs)
[docs]def empty(stype, shape, ctx=None, dtype=None): """Returns a new array of given shape and type, without initializing entries. Parameters ---------- stype: string The storage type of the empty array, such as 'row_sparse', 'csr', etc shape : int or tuple of int The shape of the empty array. ctx : Context, optional An optional device context (default is the current default context). dtype : str or numpy.dtype, optional An optional value type (default is `float32`). Returns ------- CSRNDArray or RowSparseNDArray A created array. """ if isinstance(shape, int): shape = (shape, ) if ctx is None: ctx = Context.default_ctx if dtype is None: dtype = mx_real_t assert(stype is not None) if stype == 'csr' or stype == 'row_sparse': return zeros(stype, shape, ctx=ctx, dtype=dtype) else: raise Exception("unknown stype : " + str(stype))
[docs]def array(source_array, ctx=None, dtype=None): """Creates a sparse array from any object exposing the array interface. Parameters ---------- source_array : RowSparseNDArray, CSRNDArray or scipy.sparse.csr.csr_matrix The source sparse array ctx : Context, optional The default context is ``source_array.context`` if ``source_array`` is an NDArray. \ The current default context otherwise. dtype : str or numpy.dtype, optional The data type of the output array. The default dtype is ``source_array.dtype`` if `source_array` is an `NDArray`, `numpy.ndarray` or `scipy.sparse.csr.csr_matrix`, \ `float32` otherwise. Returns ------- RowSparseNDArray or CSRNDArray An array with the same contents as the `source_array`. Examples -------- >>> import scipy.sparse as spsp >>> csr = spsp.csr_matrix((2, 100)) >>> mx.nd.sparse.array(csr) >>> mx.nd.sparse.array(mx.nd.sparse.zeros('csr', (3, 2))) >>> mx.nd.sparse.array(mx.nd.sparse.zeros('row_sparse', (3, 2))) """ ctx = Context.default_ctx if ctx is None else ctx if isinstance(source_array, NDArray): assert(source_array.stype != 'default'), \ "Please use `tostype` to create RowSparseNDArray or CSRNDArray from an NDArray" # prepare dtype and ctx based on source_array, if not provided dtype = _prepare_default_dtype(source_array, dtype) # if both dtype and ctx are different from source_array, we cannot copy directly if source_array.dtype != dtype and source_array.context != ctx: arr = empty(source_array.stype, source_array.shape, dtype=dtype) arr[:] = source_array arr = arr.as_in_context(ctx) else: arr = empty(source_array.stype, source_array.shape, dtype=dtype, ctx=ctx) arr[:] = source_array return arr elif spsp and isinstance(source_array, spsp.csr.csr_matrix): # TODO(haibin) implement `_sync_copy_from` with scipy csr object to reduce a copy # preprocess scipy csr to canonical form csr = source_array.sorted_indices() csr.sum_duplicates() dtype = _prepare_default_dtype(source_array, dtype) return csr_matrix((csr.data, csr.indices, csr.indptr), shape=csr.shape, \ dtype=dtype, ctx=ctx) elif isinstance(source_array, (np.ndarray, np.generic)): raise ValueError("Please use mx.nd.array to create an NDArray with source_array of type ", type(source_array)) else: raise ValueError("Unexpected source_array type: ", type(source_array))