1 #ifndef __CUV_NDARRAY_HPP__
2 #define __CUV_NDARRAY_HPP__
4 #include <boost/multi_array/extent_gen.hpp>
5 #include <boost/multi_array/index_gen.hpp>
6 #include <boost/shared_ptr.hpp>
13 #include "allocators.hpp"
15 #include "meta_programming.hpp"
22 static inline void cuvAssertFailed(
const char *msg) {
23 throw std::runtime_error(std::string(msg));
34 #define cuvAssert(X) \
35 if(!(X)){ cuv::cuvAssertFailed(#X); }
37 using boost::detail::multi_array::extent_gen;
38 using boost::detail::multi_array::index_gen;
49 typedef boost::detail::multi_array::index_range<boost::detail::multi_array::index, boost::detail::multi_array::size_type> index_range;
54 typedef index_range::index index;
56 #ifndef CUV_DONT_CREATE_EXTENTS_OBJ
69 extent_gen<0> extents;
83 index_gen<0, 0> indices;
92 template<
class V,
class M,
class L>
class ndarray;
93 template<
class V,
class M,
class L>
class ndarray_view;
96 template<
class V,
class M,
class L>
97 void fill(ndarray<V, M, L>& v,
const V& p);
107 template<
class index_type,
class size_type>
108 void get_pitched_params(size_type& rows, size_type& cols, size_type& pitch,
109 const linear_memory<size_type, host_memory_space>& shape,
110 const linear_memory<index_type, host_memory_space>& stride, row_major) {
112 rows = std::accumulate(shape[0].ptr, shape[0].ptr + shape.size() - 1, 1, std::multiplies<index_type>());
113 cols = shape[shape.size() - 1];
114 pitch = stride[shape.size() - 2];
120 template<
class index_type,
class size_type>
121 void get_pitched_params(size_type& rows, size_type& cols, size_type& pitch,
122 const linear_memory<size_type, host_memory_space>& shape,
123 const linear_memory<index_type, host_memory_space>& stride, column_major) {
125 rows = std::accumulate(shape[0].ptr + 1, shape[0].ptr + shape.size(), 1, std::multiplies<index_type>());
135 template<
class M,
class L>
144 boost::shared_ptr<allocator> m_allocator;
204 template<
class V,
class M,
class L = row_major>
222 boost::shared_ptr<allocator> m_allocator;
225 void check_size_limit(
size_t size)
const {
226 if (size > static_cast<size_t>(std::numeric_limits<index_type>::max())) {
227 throw std::runtime_error(
"maximum ndarray size exceeded");
232 template<
class _V,
class _M,
class _L>
260 for (
int i = 0; i < D; i++) {
312 for (
size_t i = 0; i < D; i++) {
363 boost::shared_ptr<memory_type>&
mem() {
370 const boost::shared_ptr<memory_type>&
mem()
const {
378 std::multiplies<size_t>());
380 check_size_limit(size);
397 std::multiplies<size_type>());
399 check_size_limit(size);
405 std::vector<size_type>
shape()
const {
407 return std::vector<size_type>();
417 std::vector<size_type>
shape;
418 shape.reserve(
ndim());
422 std::back_inserter(shape), std::bind2nd(std::equal_to<size_type>(), 1));
463 for (
int i = ndim - 1; i >= 0; --i) {
464 virtualstride[i] = virt_size;
470 idx -= (idx / virtualstride[i]) * virtualstride[i];
476 for (
unsigned int i = 0; i <
ndim; ++i) {
477 virtualstride[i] = virt_size;
481 for (
int i = ndim - 1; i >= 0; --i) {
483 idx -= (idx / virtualstride[i]) * virtualstride[i];
486 delete[] virtualstride;
492 return const_cast<ndarray&
>(*this)[idx];
502 cuvAssert(
ndim()==1);
514 return const_cast<ndarray&
>(*this)(i0);
519 return const_cast<ndarray&
>(*this)(i0, i1);
525 cuvAssert(
ndim()==2);
535 return const_cast<ndarray&
>(*this)(i0, i1, i2);
541 cuvAssert(
ndim()==3);
552 return const_cast<ndarray&
>(*this)(i0, i1, i2, i3);
558 cuvAssert(
ndim()==4);
571 return const_cast<ndarray&
>(*this)(i0, i1, i2, i3, i4);
577 cuvAssert(
ndim()==5);
596 ndarray(
const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
597 m_allocator(_allocator),
m_info(_allocator),
m_ptr(NULL) {
610 m_allocator(o.m_allocator),
622 m_allocator(o.m_allocator),
634 m_allocator(o.m_allocator),
647 m_allocator(o.m_allocator),
659 m_allocator(o.m_allocator),
672 m_allocator(o.m_allocator),
687 m_allocator(o.m_allocator),
705 const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
706 m_allocator(_allocator),
717 explicit ndarray(
const size_type i,
const int j,
const boost::shared_ptr<allocator> _allocator =
718 boost::make_shared<default_allocator>()) :
719 m_allocator(_allocator),
733 const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
734 m_allocator(_allocator),
738 for (
size_t i = 0; i < D; i++)
748 explicit ndarray(
const std::vector<size_type>& eg,
749 const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
750 m_allocator(_allocator),
754 for (
size_t i = 0; i < eg.size(); i++)
765 const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
766 m_allocator(_allocator),
770 for (
size_t i = 0; i < eg.size(); i++)
780 boost::make_shared<default_allocator>()) :
781 m_allocator(_allocator),
785 for (
size_t i = 0; i < D; i++)
801 boost::make_shared<default_allocator>()) :
802 m_allocator(_allocator),
808 for (
int i = D - 1; i >= 0; i--) {
811 size *= eg.ranges_[i].finish();
814 for (
size_t i = 0; i < D; i++) {
817 size *= eg.ranges_[i].finish();
824 const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
825 m_allocator(_allocator),
828 unsigned int D = shape.size();
832 for (
int i = D - 1; i >= 0; i--) {
838 for (
size_t i = 0; i < D; i++) {
850 template<
int D,
int E>
851 explicit ndarray(
const index_gen<D, E>& idx,
value_type* ptr,
const boost::shared_ptr<allocator> _allocator =
852 boost::make_shared<default_allocator>()) :
853 m_allocator(_allocator),
859 for (
int i = D - 1; i >= 0; i--) {
862 size *= idx.ranges_[i].finish();
865 for (
size_t i = 0; i < D; i++) {
868 size *= idx.ranges_[i].finish();
885 template<
class _M,
class _L>
888 throw std::runtime_error(
"copying ndarray did not succeed. Maybe a shape mismatch?");
912 typename boost::enable_if_c<boost::is_convertible<_V, value_type>::value,
ndarray&>::type
operator=(
984 template<
int D,
int E>
992 std::vector<int> shapes;
993 std::vector<int> strides;
996 cuvAssert(o.
ndim()==D);
998 for (
size_t i = 0; i < D; i++) {
999 int start = idx.ranges_[i].get_start(0);
1000 int finish = idx.ranges_[i].get_finish(o.
shape(i));
1001 int stride = idx.ranges_[i].stride();
1003 start += o.
shape(i);
1005 finish += o.
shape(i);
1007 cuvAssert(finish>start);
1010 if (idx.ranges_[i].is_degenerate()) {
1013 shapes.push_back((finish - start) / stride);
1035 std::vector<size_type>
shape(D);
1036 for (
size_t i = 0; i < D; i++)
1037 shape[i] = eg.ranges_[i].finish();
1047 void reshape(
const std::vector<size_type>& shape) {
1048 size_type new_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_type>());
1050 throw std::runtime_error(
"cannot reshape: ndarray is not c_contiguous");
1051 if (
size() != new_size)
1052 throw std::runtime_error(
"cannot reshape: products do not match");
1056 for (
int i = shape.size() - 1; i >= 0; i--) {
1062 for (
size_t i = 0; i < shape.size(); i++) {
1082 void resize(
const std::vector<size_type>& shape) {
1084 size_type new_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_type>());
1093 *
this =
ndarray(shape, m_allocator);
1104 std::vector<size_type>
shape(D);
1105 for (
size_t i = 0; i < D; i++)
1106 shape[i] = eg.ranges_[i].finish();
1137 template<
class OM,
class OL>
1154 m_memory->copy2d_from(
m_ptr, src.
ptr(), col, pitch, row, col, OM(), stream);
1157 detail::get_pitched_params(row, col, pitch,
info().host_shape,
info().host_stride, L());
1158 m_memory->copy2d_from(
m_ptr, src.
ptr(), pitch, col, row, col, OM(), stream);
1162 detail::get_pitched_params(drow, dcol, dpitch,
info().host_shape,
info().host_stride, L());
1164 cuvAssert(scol==srow);
1165 cuvAssert(dcol==drow);
1166 m_memory->copy2d_from(
m_ptr, src.
ptr(), dpitch, spitch, srow, scol, OM(), stream);
1168 throw std::runtime_error(
"copying of generic strides not implemented yet");
1179 template<
class OM,
class OL>
1193 d.copy_from(src.
ptr(), src.
size(), OM(), stream);
1198 d.copy2d_from(src.
ptr(), col, pitch, row, col, OM(), stream);
1200 throw std::runtime_error(
"copying arbitrarily strided memory not implemented");
1210 template<
class OM,
class OL>
1212 assert(src.
ndim()>=2);
1221 d->set_strides(
info().host_stride,
info().host_shape, L());
1225 d.copy2d_from(src, stream);
1227 throw std::runtime_error(
"copying arbitrarily strided memory not implemented");
1242 template<
class V,
class M,
class L = row_major>
1251 template<
class _V,
class _M,
class _L>
1266 throw std::runtime_error(
"copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1275 throw std::runtime_error(
"copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1287 throw std::runtime_error(
"copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1299 throw std::runtime_error(
"copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1376 template<
int D,
int E>
1378 ndarray<V, M, L>(o.m_allocator)
1382 std::vector<int> shapes;
1383 std::vector<int> strides;
1386 cuvAssert(o.
ndim()==D);
1387 for (
size_t i = 0; i < D; i++) {
1388 int start = idx.ranges_[i].get_start(0);
1389 int finish = idx.ranges_[i].get_finish(o.
shape(i));
1390 int stride = idx.ranges_[i].stride();
1392 start += o.
shape(i);
1394 finish += o.
shape(i);
1396 cuvAssert(finish>start);
1399 if (idx.ranges_[i].is_degenerate()) {
1402 shapes.push_back((finish - start) / stride);
1419 template<
int D,
int E>
1421 ndarray<V, M, L>(o.m_allocator)
1425 std::vector<int> shapes;
1426 std::vector<int> strides;
1429 cuvAssert(o.
ndim()==D);
1430 for (
size_t i = 0; i < D; i++) {
1431 int start = idx.ranges_[i].get_start(0);
1432 int finish = idx.ranges_[i].get_finish(o.
shape(i));
1433 int stride = idx.ranges_[i].stride();
1435 start += o.
shape(i);
1437 finish += o.
shape(i);
1439 cuvAssert(finish>start);
1442 if (idx.ranges_[i].is_degenerate()) {
1445 shapes.push_back((finish - start) / stride);
1463 template<
class V,
class V2,
class M,
class M2,
class L>
1464 bool equal_shape(
const ndarray<V, M, L>& a,
const ndarray<V2, M2, L>& b) {
1465 return a.effective_shape() == b.effective_shape();
1472 template<
class Mat,
class NewVT>
1477 template<
class Mat,
class NewML>
1482 template<
class Mat,
class NewMS>
1505 ostream& operator<<(ostream& o, const cuv::linear_memory<V, cuv::host_memory_space>& t) {
1507 for (
unsigned int i = 0; i < t.size(); i++)
1519 ostream& operator<<(ostream& o, const cuv::linear_memory<V, cuv::dev_memory_space>& t_) {
1522 for (
unsigned int i = 0; i < t.
size(); i++)
1534 ostream& operator<<(ostream& o, const cuv::pitched_memory<V, cuv::host_memory_space>& t) {
1536 for (
unsigned int i = 0; i < t.rows(); i++) {
1537 for (
unsigned int j = 0; j < t.rows(); j++) {
1538 o << t(i, j) <<
" ";
1540 if (i < t.rows() - 1)
1553 ostream& operator<<(ostream& o, const cuv::pitched_memory<V, cuv::dev_memory_space>& t_) {
1556 for (
unsigned int i = 0; i < t.
rows(); i++) {
1557 for (
unsigned int j = 0; j < t.
rows(); j++) {
1558 o << t(i, j) <<
" ";
1560 if (i < t.
rows() - 1)
1573 template<
class V,
class L>
1574 ostream& operator<<(ostream& o, const cuv::ndarray<V, cuv::dev_memory_space, L>& t) {
1575 return o << cuv::ndarray<V, cuv::host_memory_space, L>(t);
1584 template<
class V,
class L>
1585 ostream& operator<<(ostream& o, const cuv::ndarray<V, cuv::host_memory_space, L>& t) {
1589 if (t.ndim() == 1) {
1591 for (
unsigned int i = 0; i < t.shape(0); i++)
1595 if (t.ndim() == 2) {
1597 for (
unsigned int i = 0; i < t.shape(0); ++i) {
1601 for (
unsigned int j = 0; j < t.shape(1); j++)
1602 o << t(i, j) <<
" ";
1604 if (i != t.shape(0) - 1)
1609 if (t.ndim() == 3) {
1610 o <<
"[" << std::endl;
1611 for (
unsigned int l = 0; l < t.shape(0); l++) {
1613 for (
unsigned int i = 0; i < t.shape(1); ++i) {
1618 for (
unsigned int j = 0; j < t.shape(2); j++)
1619 o << t[l * t.shape(1) * t.shape(2) + i * t.shape(2) + j] <<
" ";
1621 if (i != t.shape(1) - 1)
1625 if (l < t.shape(0) - 1)
1630 throw std::runtime_error(
"printing of ndarrays with >3 dimensions not implemented");