curfil  ..
 All Classes Functions Variables Typedefs Friends Groups Pages
ndarray.hpp
1 #ifndef __CUV_NDARRAY_HPP__
2 #define __CUV_NDARRAY_HPP__
3 
4 #include <boost/multi_array/extent_gen.hpp>
5 #include <boost/multi_array/index_gen.hpp>
6 #include <boost/shared_ptr.hpp>
7 #include <iostream>
8 #include <limits>
9 #include <numeric>
10 #include <stdexcept>
11 #include <vector>
12 
13 #include "allocators.hpp"
14 #include "memory.hpp"
15 #include "meta_programming.hpp"
16 
17 namespace cuv {
18 
22 static inline void cuvAssertFailed(const char *msg) {
23  throw std::runtime_error(std::string(msg));
24 }
25 
34 #define cuvAssert(X) \
35  if(!(X)){ cuv::cuvAssertFailed(#X); }
36 
37 using boost::detail::multi_array::extent_gen;
38 using boost::detail::multi_array::index_gen;
39 
49 typedef boost::detail::multi_array::index_range<boost::detail::multi_array::index, boost::detail::multi_array::size_type> index_range;
50 
54 typedef index_range::index index;
55 
56 #ifndef CUV_DONT_CREATE_EXTENTS_OBJ
57 
58 namespace {
69 extent_gen<0> extents;
70 
83 index_gen<0, 0> indices;
84 }
85 #endif
86 
92 template<class V, class M, class L> class ndarray;
93 template<class V, class M, class L> class ndarray_view;
94 
96 template<class V, class M, class L>
97 void fill(ndarray<V, M, L>& v, const V& p);
98 
99 namespace detail {
100 
107 template<class index_type, class size_type>
108 void get_pitched_params(size_type& rows, size_type& cols, size_type& pitch,
109  const linear_memory<size_type, host_memory_space>& shape,
110  const linear_memory<index_type, host_memory_space>& stride, row_major) {
111  // strided dimension is the LAST one
112  rows = std::accumulate(shape[0].ptr, shape[0].ptr + shape.size() - 1, 1, std::multiplies<index_type>());
113  cols = shape[shape.size() - 1];
114  pitch = stride[shape.size() - 2];
115 }
116 
120 template<class index_type, class size_type>
121 void get_pitched_params(size_type& rows, size_type& cols, size_type& pitch,
122  const linear_memory<size_type, host_memory_space>& shape,
123  const linear_memory<index_type, host_memory_space>& stride, column_major) {
124  // strided dimension is the FIRST one
125  rows = std::accumulate(shape[0].ptr + 1, shape[0].ptr + shape.size(), 1, std::multiplies<index_type>());
126  cols = shape[0];
127  pitch = stride[1];
128 }
129 
130 }
131 
135 template<class M, class L>
137 
138 public:
139 
140  typedef unsigned int size_type;
141  typedef int index_type;
142  typedef M data_memory_space;
143 
144  boost::shared_ptr<allocator> m_allocator;
145 
148 
151 
154 
157 
159  ndarray_info(const boost::shared_ptr<allocator>& _allocator) :
160  m_allocator(_allocator), host_shape(_allocator), host_stride(_allocator),
161  data_shape(_allocator), data_stride(_allocator)
162  {
163  }
164 
167  return host_shape.size();
168  }
169 
171  ndarray_info(size_type s, const boost::shared_ptr<allocator>& _allocator) :
172  m_allocator(_allocator), host_shape(_allocator), host_stride(_allocator),
173  data_shape(_allocator), data_stride(_allocator)
174  {
175  resize(s);
176  }
177 
179  void resize(size_type s) {
180  host_shape.set_size(s);
182  }
183 
186  m_allocator(o.m_allocator), host_shape(o.host_shape), host_stride(o.host_stride),
187  data_shape(m_allocator), data_stride(m_allocator)
188  {
189  }
190 
192  template<class OM>
194  m_allocator(o.m_allocator), host_shape(o.host_shape), host_stride(o.host_stride),
195  data_shape(m_allocator), data_stride(m_allocator)
196  {
197  }
198 
199 };
200 
204 template<class V, class M, class L = row_major>
205 class ndarray {
206 
207 public:
208 
216  typedef L memory_layout_type;
217 
220 
221 public:
222  boost::shared_ptr<allocator> m_allocator;
223 
224 private:
225  void check_size_limit(size_t size) const {
226  if (size > static_cast<size_t>(std::numeric_limits<index_type>::max())) {
227  throw std::runtime_error("maximum ndarray size exceeded");
228  }
229  }
230 
232  template<class _V, class _M, class _L>
233  friend class ndarray_view;
234 
235 protected:
236 
239 
241  boost::shared_ptr<memory_type> m_memory;
242 
244  V* m_ptr;
245 
258  size_type index_of(int D, index_type* arr) const {
259  index_type pos = 0;
260  for (int i = 0; i < D; i++) {
261  index_type temp = arr[i];
262  if (temp < 0)
263  temp = m_info.host_shape[i] + temp;
264  pos += temp * m_info.host_stride[i];
265  }
266  return pos;
267  }
268 
275  linear_memory<V, M> mem(t.size(), t.m_allocator);
276  mem.set_strides(t.m_info.host_stride, t.m_info.host_shape, L());
277  t.m_ptr = mem.ptr();
278  t.m_memory.reset(new memory<V, M>(mem.release(), mem.size(), t.m_allocator));
279  }
280 
287  typename ndarray<V, M, L>::size_type row, col, pitch;
288  detail::get_pitched_params(row, col, pitch, t.m_info.host_shape, t.m_info.host_stride, L());
289  pitched_memory<V, M> d(row, col);
291  t.m_ptr = d.ptr();
292  t.m_memory.reset(new memory<V, M>(d.release(), d.size(), t.m_allocator));
293  }
294 
295 public:
296 
309  template<size_t D>
310  size_type index_of(const extent_gen<D>& eg) const {
311  index_type pos = 0;
312  for (size_t i = 0; i < D; i++) {
313  index_type temp = eg.ranges_[i].finish();
314  if (temp < 0)
315  temp = m_info.host_shape[i] + temp;
316  pos += temp * m_info.host_stride[i];
317  }
318  return pos;
319  }
320 
325 
326  index_type ndim() const {
327  return m_info.host_shape.size();
328  }
329 
333  size_type shape(const size_t i) const {
334  return m_info.host_shape[i];
335  }
336 
340  index_type stride(const size_t i) const {
341  return m_info.host_stride[i];
342  }
343 
345  V* ptr() {
346  return m_ptr;
347  }
348 
353  const V* ptr() const {
354  return m_ptr;
355  }
356 
358  void set_ptr_offset(long int i) {
359  m_ptr = m_memory->ptr() + i;
360  }
361 
363  boost::shared_ptr<memory_type>& mem() {
364  return m_memory;
365  }
370  const boost::shared_ptr<memory_type>& mem() const {
371  return m_memory;
372  }
373 
376  size_type size() const {
377  size_t size = std::accumulate(m_info.host_shape[0].ptr, m_info.host_shape[0].ptr + m_info.host_shape.size(), 1,
378  std::multiplies<size_t>());
379 
380  check_size_limit(size);
381 
382  return static_cast<size_type>(size);
383  }
384 
392  size_type memsize() const {
393 #ifndef NDEBUG
394  cuvAssert(is_c_contiguous());
395 #endif
396  size_t size = std::accumulate(m_info.host_shape[0].ptr, m_info.host_shape[0].ptr + m_info.host_shape.size(), 1,
397  std::multiplies<size_type>());
398 
399  check_size_limit(size);
400 
401  return static_cast<size_type>(size);
402  }
403 
405  std::vector<size_type> shape() const {
406  if (ndim() == 0)
407  return std::vector<size_type>();
408  return std::vector<size_type>(m_info.host_shape[0].ptr, m_info.host_shape[0].ptr + m_info.host_shape.size());
409  }
410 
416  std::vector<size_type> effective_shape() const {
417  std::vector<size_type> shape;
418  shape.reserve(ndim());
419  if (ndim() == 0)
420  return shape;
421  std::remove_copy_if(m_info.host_shape[0].ptr, m_info.host_shape[0].ptr + m_info.host_shape.size(),
422  std::back_inserter(shape), std::bind2nd(std::equal_to<size_type>(), 1));
423  return shape;
424  }
425 
427  const info_type& info() const {
428  return m_info;
429  }
430 
433  return m_info;
434  }
435 
437  bool is_c_contiguous() const {
439  }
440 
442  bool is_2dcopyable() const {
444  }
445  // accessors
457  size_type* virtualstride = new size_type[ndim];
458  size_type pos = 0;
460  // row major
461  {
462  size_type virt_size = 1;
463  for (int i = ndim - 1; i >= 0; --i) {
464  virtualstride[i] = virt_size;
465  virt_size *= m_info.host_shape[i];
466  }
467  }
468  for (size_type i = 0; i < ndim; ++i) {
469  pos += (idx / virtualstride[i]) * m_info.host_stride[i];
470  idx -= (idx / virtualstride[i]) * virtualstride[i];
471  }
472  } else {
473  // column major
474  {
475  size_type virt_size = 1;
476  for (unsigned int i = 0; i < ndim; ++i) {
477  virtualstride[i] = virt_size;
478  virt_size *= m_info.host_shape[i];
479  }
480  }
481  for (int i = ndim - 1; i >= 0; --i) {
482  pos += (idx / virtualstride[i]) * m_info.host_stride[i];
483  idx -= (idx / virtualstride[i]) * virtualstride[i];
484  }
485  }
486  delete[] virtualstride;
487  return reference_type(m_ptr + pos);
488  }
489 
492  return const_cast<ndarray&>(*this)[idx];
493  }
494 
501 #ifndef NDEBUG
502  cuvAssert(ndim()==1);
503  cuvAssert((i0>=0 && (size_type)i0 < shape(0)) || (i0<0 && (size_type)(-i0)<shape(0)+1))
504 #endif
505  if (i0 >= 0) {
506  return reference_type(m_ptr + i0);
507  } else {
508  return reference_type(m_ptr + shape(0) - i0);
509  }
510  }
511 
514  return const_cast<ndarray&>(*this)(i0);
515  }
516 
519  return const_cast<ndarray&>(*this)(i0, i1);
520  }
521 
524 #ifndef NDEBUG
525  cuvAssert(ndim()==2);
526  cuvAssert((i0>=0 && (size_type)i0 < shape(0)) || (i0<0 && (size_type)(-i0)<shape(0)+1))
527  cuvAssert((i1>=0 && (size_type)i1 < shape(1)) || (i1<0 && (size_type)(-i1)<shape(1)+1))
528 #endif
529  index_type arr[2] = { i0, i1 };
530  return reference_type(m_ptr + index_of(2, arr));
531  }
532 
535  return const_cast<ndarray&>(*this)(i0, i1, i2);
536  }
537 
540 #ifndef NDEBUG
541  cuvAssert(ndim()==3);
542  cuvAssert((i0>=0 && (size_type)i0 < shape(0)) || (i0<0 && (size_type)-i0<shape(0)+1))
543  cuvAssert((i1>=0 && (size_type)i1 < shape(1)) || (i1<0 && (size_type)-i1<shape(1)+1))
544  cuvAssert((i2>=0 && (size_type)i2 < shape(2)) || (i2<0 && (size_type)-i2<shape(2)+1))
545 #endif
546  index_type arr[3] = { i0, i1, i2 };
547  return reference_type(m_ptr + index_of(3, arr));
548  }
549 
552  return const_cast<ndarray&>(*this)(i0, i1, i2, i3);
553  }
554 
557 #ifndef NDEBUG
558  cuvAssert(ndim()==4);
559  cuvAssert((i0>=0 && (size_type)i0 < shape(0)) || (i0<0 && (size_type)-i0<shape(0)+1))
560  cuvAssert((i1>=0 && (size_type)i1 < shape(1)) || (i1<0 && (size_type)-i1<shape(1)+1))
561  cuvAssert((i2>=0 && (size_type)i2 < shape(2)) || (i2<0 && (size_type)-i2<shape(2)+1))
562  cuvAssert((i3>=0 && (size_type)i3 < shape(3)) || (i3<0 && (size_type)-i3<shape(3)+1))
563 #endif
564  index_type arr[4] = { i0, i1, i2, i3 };
565  return reference_type(m_ptr + index_of(4, arr));
566  }
567 
570  index_type i4) const {
571  return const_cast<ndarray&>(*this)(i0, i1, i2, i3, i4);
572  }
573 
576 #ifndef NDEBUG
577  cuvAssert(ndim()==5);
578  cuvAssert((i0>=0 && (size_type)i0 < shape(0)) || (i0<0 && (size_type)-i0<shape(0)+1))
579  cuvAssert((i1>=0 && (size_type)i1 < shape(1)) || (i1<0 && (size_type)-i1<shape(1)+1))
580  cuvAssert((i2>=0 && (size_type)i2 < shape(2)) || (i2<0 && (size_type)-i2<shape(2)+1))
581  cuvAssert((i3>=0 && (size_type)i3 < shape(3)) || (i3<0 && (size_type)-i3<shape(3)+1))
582  cuvAssert((i4>=0 && (size_type)i4 < shape(4)) || (i4<0 && (size_type)-i4<shape(4)+1))
583 #endif
584  index_type arr[5] = { i0, i1, i2, i3, i4 };
585  return reference_type(m_ptr + index_of(5, arr));
586  }
587  // accessing stored values
596  ndarray(const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
597  m_allocator(_allocator), m_info(_allocator), m_ptr(NULL) {
598  }
599 
600  // ****************************************************************
601  // Constructing from other ndarray
602  // ****************************************************************
603 
609  ndarray(const ndarray& o) :
610  m_allocator(o.m_allocator),
611  m_info(o.m_info), // copy only shape
612  m_memory(o.m_memory), // increase ref counter
613  m_ptr(o.m_ptr) {
614  } // same pointer in memory
615 
620  template<class OM>
621  ndarray(const ndarray<value_type, OM, L>& o, cudaStream_t stream = 0) :
622  m_allocator(o.m_allocator),
623  m_info(o.info()), // primarily to copy shape
624  m_ptr(NULL) {
625  copy_memory(o, linear_memory_tag(), stream);
626  m_ptr = m_memory->ptr();
627  }
628 
633  explicit ndarray(const ndarray& o, pitched_memory_tag, cudaStream_t stream = 0) :
634  m_allocator(o.m_allocator),
635  m_info(o.m_info), // primarily to copy shape
636  m_ptr(NULL) {
637  copy_memory(o, pitched_memory_tag(), stream);
638  m_ptr = m_memory->ptr();
639  }
640 
645  template<class OM>
646  explicit ndarray(const ndarray<value_type, OM, L>& o, pitched_memory_tag, cudaStream_t stream = 0) :
647  m_allocator(o.m_allocator),
648  m_info(o.info()), // primarily to copy shape
649  m_ptr(NULL) {
650  copy_memory(o, pitched_memory_tag(), stream);
651  m_ptr = m_memory->ptr();
652  }
653 
658  explicit ndarray(const ndarray& o, linear_memory_tag, cudaStream_t stream = 0) :
659  m_allocator(o.m_allocator),
660  m_info(o.m_info), // primarily to copy shape
661  m_ptr(NULL) {
662  copy_memory(o, linear_memory_tag(), stream);
663  m_ptr = m_memory->ptr();
664  }
665 
670  template<class OM>
671  explicit ndarray(const ndarray<value_type, OM, L>& o, linear_memory_tag, cudaStream_t stream = 0) :
672  m_allocator(o.m_allocator),
673  m_info(o.info()), // primarily to copy shape
674  m_ptr(NULL) {
675  copy_memory(o, linear_memory_tag(), stream);
676  m_ptr = m_memory->ptr();
677  }
678 
685  template<class OL>
686  explicit ndarray(const ndarray<value_type, M, OL>& o) :
687  m_allocator(o.m_allocator),
688  m_info(o.m_allocator),
689  m_memory(o.mem()), // increase ref counter
690  m_ptr(const_cast<V*>(o.ptr())) { // same pointer in memory
695  }
696 
697  // ****************************************************************
698  // Constructing from SHAPE
699  // ****************************************************************
700 
704  explicit ndarray(const size_type i,
705  const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
706  m_allocator(_allocator),
707  m_info(_allocator),
708  m_ptr(NULL) {
709  m_info.resize(1);
710  m_info.host_shape[0] = i;
711  allocate(*this, linear_memory_tag());
712  }
713 
717  explicit ndarray(const size_type i, const int j, const boost::shared_ptr<allocator> _allocator =
718  boost::make_shared<default_allocator>()) :
719  m_allocator(_allocator),
720  m_info(_allocator),
721  m_ptr(NULL) {
722  m_info.resize(2);
723  m_info.host_shape[0] = i;
724  m_info.host_shape[1] = j;
725  allocate(*this, linear_memory_tag());
726  }
727 
731  template<size_t D>
732  explicit ndarray(const extent_gen<D>& eg,
733  const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
734  m_allocator(_allocator),
735  m_info(_allocator),
736  m_ptr(NULL) {
737  m_info.resize(D);
738  for (size_t i = 0; i < D; i++)
739  m_info.host_shape[i] = eg.ranges_[i].finish();
740  allocate(*this, linear_memory_tag());
741  }
742 
748  explicit ndarray(const std::vector<size_type>& eg,
749  const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
750  m_allocator(_allocator),
751  m_info(_allocator),
752  m_ptr(NULL) {
753  m_info.resize(eg.size());
754  for (size_t i = 0; i < eg.size(); i++)
755  m_info.host_shape[i] = eg[i];
756  allocate(*this, linear_memory_tag());
757  }
758 
764  explicit ndarray(const std::vector<size_type>& eg, pitched_memory_tag,
765  const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
766  m_allocator(_allocator),
767  m_info(_allocator),
768  m_ptr(NULL) {
769  m_info.resize(eg.size());
770  for (size_t i = 0; i < eg.size(); i++)
771  m_info.host_shape[i] = eg[i];
772  allocate(*this, pitched_memory_tag());
773  }
774 
778  template<size_t D>
779  explicit ndarray(const extent_gen<D>& eg, pitched_memory_tag, const boost::shared_ptr<allocator> _allocator =
780  boost::make_shared<default_allocator>()) :
781  m_allocator(_allocator),
782  m_info(_allocator),
783  m_ptr(NULL) {
784  m_info.resize(D);
785  for (size_t i = 0; i < D; i++)
786  m_info.host_shape[i] = eg.ranges_[i].finish();
787  allocate(*this, pitched_memory_tag());
788  }
789 
790  // ****************************************************************
791  // Constructing from shape and raw pointer
792  // ****************************************************************
793 
799  template<size_t D>
800  explicit ndarray(const extent_gen<D>& eg, value_type* ptr, const boost::shared_ptr<allocator> _allocator =
801  boost::make_shared<default_allocator>()) :
802  m_allocator(_allocator),
803  m_info(_allocator),
804  m_ptr(ptr) {
805  m_info.resize(D);
806  size_t size = 1;
808  for (int i = D - 1; i >= 0; i--) {
809  m_info.host_shape[i] = eg.ranges_[i].finish();
810  m_info.host_stride[i] = size;
811  size *= eg.ranges_[i].finish();
812  }
813  } else {
814  for (size_t i = 0; i < D; i++) {
815  m_info.host_shape[i] = eg.ranges_[i].finish();
816  m_info.host_stride[i] = size;
817  size *= eg.ranges_[i].finish();
818  }
819  }
820  m_memory.reset(new memory<V, M>(ptr, size, m_allocator, false));
821  }
822 
823  explicit ndarray(const std::vector<size_type>& shape, value_type* ptr,
824  const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
825  m_allocator(_allocator),
826  m_info(_allocator),
827  m_ptr(ptr) {
828  unsigned int D = shape.size();
829  m_info.resize(D);
830  size_type size = 1;
832  for (int i = D - 1; i >= 0; i--) {
833  m_info.host_shape[i] = shape[i];
834  m_info.host_stride[i] = size;
835  size *= shape[i];
836  }
837  else
838  for (size_t i = 0; i < D; i++) {
839  m_info.host_shape[i] = shape[i];
840  m_info.host_stride[i] = size;
841  size *= shape[i];
842  }
843  }
850  template<int D, int E>
851  explicit ndarray(const index_gen<D, E>& idx, value_type* ptr, const boost::shared_ptr<allocator> _allocator =
852  boost::make_shared<default_allocator>()) :
853  m_allocator(_allocator),
854  m_info(_allocator),
855  m_ptr(ptr) {
856  m_info.resize(D);
857  size_type size = 1;
859  for (int i = D - 1; i >= 0; i--) {
860  m_info.host_shape[i] = idx.ranges_[i].finish();
861  m_info.host_stride[i] = size;
862  size *= idx.ranges_[i].finish();
863  }
864  else
865  for (size_t i = 0; i < D; i++) {
866  m_info.host_shape[i] = idx.ranges_[i].finish();
867  m_info.host_stride[i] = size;
868  size *= idx.ranges_[i].finish();
869  }
870  }
871  // @} // constructors
872 
873  // ****************************************************************
874  // assignment operators (try not to reallocate if shapes match)
875  // ****************************************************************
876 
885  template<class _M, class _L>
886  ndarray& assign(const ndarray<V, _M, _L>& o, cudaStream_t stream = 0) {
887  if (!copy_memory(o, false, stream))
888  throw std::runtime_error("copying ndarray did not succeed. Maybe a shape mismatch?");
889  return *this;
890  }
891 
897  ndarray& operator=(const ndarray& o) {
898  if (this == &o)
899  return *this; // check for self-assignment
900 
901  // TODO make use of copy-and-swap idiom
902  m_memory = o.mem();
903  m_ptr = const_cast<V*>(o.ptr());
904  m_info = o.info();
905  return *this;
906  }
907 
911  template<class _V>
912  typename boost::enable_if_c<boost::is_convertible<_V, value_type>::value, ndarray&>::type operator=(
913  const _V& scalar) {
914  fill(*this, scalar);
915  return *this;
916  }
917 
925  template<class OM>
926  ndarray& assign(const ndarray<value_type, OM, L>& o, cudaStream_t stream = 0) {
927  if (!copy_memory(o, false, stream))
928  copy_memory(o, linear_memory_tag(), stream);
929  if (mem())
930  // if mem() does not exist, we're just wrapping a pointer
931  // of a std::vector or so -> simply keep it
932  m_ptr = mem()->ptr();
933  return *this;
934  }
935 
943  template<class OM>
945  return assign(o);
946  }
947 
953  template<class OL>
955  return assign(o);
956  }
957  // assignment
962  template<class T>
963  ndarray copy(T tag = linear_memory_tag(), cudaStream_t stream = 0) const {
964  ndarray t(m_allocator);
965  const ndarray& o = *this;
966  t.m_info = o.info();
967  t.copy_memory(o, tag, stream);
968  t.m_ptr = t.mem()->ptr();
969  return t;
970  }
971 
975  ndarray copy() const {
976  return copy(linear_memory_tag());
977  }
978 
984  template<int D, int E>
985  ndarray_view<V, M, L> operator[](const index_gen<D, E>& idx) const {
986 
987  ndarray_view<V, M, L> t(m_allocator);
988  const ndarray& o = *this;
989  t.m_memory = o.mem();
990  t.m_ptr = const_cast<V*>(o.ptr());
991 
992  std::vector<int> shapes;
993  std::vector<int> strides;
994  shapes.reserve(D);
995  strides.reserve(D);
996  cuvAssert(o.ndim()==D);
997 
998  for (size_t i = 0; i < D; i++) {
999  int start = idx.ranges_[i].get_start(0);
1000  int finish = idx.ranges_[i].get_finish(o.shape(i));
1001  int stride = idx.ranges_[i].stride();
1002  if (start < 0)
1003  start += o.shape(i);
1004  if (finish < 0)
1005  finish += o.shape(i);
1006 #ifndef NDEBUG
1007  cuvAssert(finish>start);
1008 #endif
1009  t.m_ptr += start * o.stride(i);
1010  if (idx.ranges_[i].is_degenerate()) {
1011  // skip dimension
1012  } else {
1013  shapes.push_back((finish - start) / stride);
1014  strides.push_back(o.stride(i) * stride);
1015  }
1016  }
1017 
1018  // store in m_info
1019  t.m_info.resize(shapes.size());
1020 
1021  std::copy(shapes.begin(), shapes.end(), t.m_info.host_shape[0].ptr);
1022  std::copy(strides.begin(), strides.end(), t.m_info.host_stride[0].ptr);
1023  return t; // should not copy mem, only m_info
1024  }
1025 
1033  template<size_t D>
1034  void reshape(const extent_gen<D>& eg) {
1035  std::vector<size_type> shape(D);
1036  for (size_t i = 0; i < D; i++)
1037  shape[i] = eg.ranges_[i].finish();
1038  reshape(shape);
1039  }
1047  void reshape(const std::vector<size_type>& shape) {
1048  size_type new_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_type>());
1049  if (!is_c_contiguous())
1050  throw std::runtime_error("cannot reshape: ndarray is not c_contiguous");
1051  if (size() != new_size)
1052  throw std::runtime_error("cannot reshape: products do not match");
1053  m_info.resize(shape.size());
1054  size_type size = 1;
1056  for (int i = shape.size() - 1; i >= 0; i--) {
1057  m_info.host_shape[i] = shape[i];
1058  m_info.host_stride[i] = size;
1059  size *= shape[i];
1060  }
1061  else
1062  for (size_t i = 0; i < shape.size(); i++) {
1063  m_info.host_shape[i] = shape[i];
1064  m_info.host_stride[i] = size;
1065  size *= shape[i];
1066  }
1067  }
1074  reshape(extents[r][c]);
1075  }
1076 
1082  void resize(const std::vector<size_type>& shape) {
1083  if (ndim() != 0) {
1084  size_type new_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_type>());
1085  if (is_c_contiguous() && size() == new_size) {
1086  reshape(shape);
1087  return;
1088  }
1089  }
1090 
1091  // free memory before we allocate new memory (important if pooling is active)
1092  m_memory.reset(new memory<V, M>(0, 0, m_allocator));
1093  *this = ndarray(shape, m_allocator);
1094  }
1102  template<size_t D>
1103  void resize(const extent_gen<D>& eg) {
1104  std::vector<size_type> shape(D);
1105  for (size_t i = 0; i < D; i++)
1106  shape[i] = eg.ranges_[i].finish();
1107  resize(shape);
1108  }
1109 
1114  void resize(size_type size) {
1115  resize(extents[size]);
1116  }
1117 
1124  resize(extents[r][c]);
1125  }
1126 
1130  void dealloc() {
1131  m_memory.reset();
1132  m_ptr = NULL;
1134  }
1135 
1137  template<class OM, class OL>
1138  bool copy_memory(const ndarray<V, OM, OL>& src, bool force_dst_contiguous, cudaStream_t stream) {
1139  if (effective_shape() != src.effective_shape() || !ptr()) {
1140  return false;
1141  }
1142 
1143  assert(m_memory.get());
1144  // ATTENTION: m_ptr might be different than m_memory->ptr()!
1145 
1146  // TODO: this could be probably implemented in the memory classes as well
1147 
1148  if (is_c_contiguous() && src.is_c_contiguous()) {
1149  // can copy w/o bothering about m_memory
1150  m_memory->copy_from(m_ptr, src.ptr(), src.size(), OM(), stream);
1151  } else if (is_c_contiguous() && src.is_2dcopyable()) {
1152  size_type row, col, pitch;
1153  detail::get_pitched_params(row, col, pitch, src.info().host_shape, src.info().host_stride, OL());
1154  m_memory->copy2d_from(m_ptr, src.ptr(), col, pitch, row, col, OM(), stream);
1155  } else if (!force_dst_contiguous && is_2dcopyable() && src.is_c_contiguous()) {
1156  size_type row, col, pitch;
1157  detail::get_pitched_params(row, col, pitch, info().host_shape, info().host_stride, L());
1158  m_memory->copy2d_from(m_ptr, src.ptr(), pitch, col, row, col, OM(), stream);
1159  } else if (!force_dst_contiguous && is_2dcopyable() && src.is_c_contiguous()) {
1160  size_type srow, scol, spitch;
1161  size_type drow, dcol, dpitch;
1162  detail::get_pitched_params(drow, dcol, dpitch, info().host_shape, info().host_stride, L());
1163  detail::get_pitched_params(srow, scol, spitch, src.info().host_shape, src.info().host_stride, OL());
1164  cuvAssert(scol==srow);
1165  cuvAssert(dcol==drow);
1166  m_memory->copy2d_from(m_ptr, src.ptr(), dpitch, spitch, srow, scol, OM(), stream);
1167  } else {
1168  throw std::runtime_error("copying of generic strides not implemented yet");
1169  }
1170 
1172  info().host_stride.reverse();
1173  info().host_shape.reverse();
1174  }
1175  return true;
1176  }
1177 
1179  template<class OM, class OL>
1180  void copy_memory(const ndarray<V, OM, OL>& src, linear_memory_tag, cudaStream_t stream) {
1181  if (copy_memory(src, true, stream)) // destination must be contiguous
1182  return;
1183  info().resize(src.ndim());
1184  info().host_shape = src.info().host_shape;
1185 
1186  // free old memory
1187  m_memory.reset(new memory<V, M>(m_allocator));
1188 
1189  linear_memory<V, M> d(src.size(), m_allocator);
1190  d.set_strides(info().host_stride, info().host_shape, L());
1191  if (src.is_c_contiguous()) {
1192  // easiest case: both linear, simply copy
1193  d.copy_from(src.ptr(), src.size(), OM(), stream);
1194  } else if (src.is_2dcopyable()) {
1195  // other memory is probably a pitched memory or some view onto an array
1196  size_type row, col, pitch;
1197  detail::get_pitched_params(row, col, pitch, src.info().host_shape, src.info().host_stride, OL());
1198  d.copy2d_from(src.ptr(), col, pitch, row, col, OM(), stream);
1199  } else {
1200  throw std::runtime_error("copying arbitrarily strided memory not implemented");
1201  }
1202  mem().reset(new memory<V, M>(d.release(), d.size(), m_allocator));
1204  info().host_stride.reverse();
1205  info().host_shape.reverse();
1206  }
1207  }
1208 
1210  template<class OM, class OL>
1211  void copy_memory(const ndarray<V, OM, OL>& src, pitched_memory_tag, cudaStream_t stream) {
1212  assert(src.ndim()>=2);
1213  if (copy_memory(src, false, stream)) // destination need not be contiguous
1214  return;
1215  info().resize(src.ndim());
1216  info().host_shape = src.info().host_shape;
1217  size_type row, col, pitch;
1218  detail::get_pitched_params(row, col, pitch, src.info().host_shape, src.info().host_stride, OL());
1219  pitched_memory<V, M> d(row, col);
1220  //dst.mem().reset(d);
1221  d->set_strides(info().host_stride, info().host_shape, L());
1222  if (src.is_2dcopyable()) {
1223  // other memory is probably a pitched memory or some view onto an array
1224  detail::get_pitched_params(row, col, pitch, src.info().host_shape, src.info().host_stride, OL());
1225  d.copy2d_from(src, stream);
1226  } else {
1227  throw std::runtime_error("copying arbitrarily strided memory not implemented");
1228  }
1229  mem().reset(new memory<V, M>(d.release(), d.size(), m_allocator));
1230 
1232  info().host_stride.reverse();
1233  info().host_shape.reverse();
1234  }
1235  }
1236 
1237 };
1238 
1242 template<class V, class M, class L = row_major>
1243 class ndarray_view: public ndarray<V, M, L>
1244 {
1245 private:
1246  typedef ndarray<V, M, L> super;
1247  using super::m_memory;
1248  using super::m_ptr;
1249  using super::m_info;
1250 
1251  template<class _V, class _M, class _L>
1252  friend class ndarray;
1253 
1254 public:
1255 
1257  ndarray_view(const boost::shared_ptr<allocator>& allocator) :
1258  ndarray<V, M, L>(allocator) {
1259  }
1260 
1264  ndarray_view& assign(const ndarray<V, M, L>& o, cudaStream_t stream = 0) {
1265  if (!this->copy_memory(o, false, stream))
1266  throw std::runtime_error("copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1267  return *this;
1268  }
1269 
1273  ndarray_view& assign(const ndarray_view<V, M, L>& o, cudaStream_t stream = 0) {
1274  if (!this->copy_memory(o, false, stream))
1275  throw std::runtime_error("copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1276  return *this;
1277  }
1278 
1284  template<class OM>
1285  ndarray_view& assign(const ndarray<V, OM, L>& o, cudaStream_t stream = 0) {
1286  if (!this->copy_memory(o, false, stream))
1287  throw std::runtime_error("copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1288  return *this;
1289  }
1290 
1296  template<class OM>
1297  ndarray_view& assign(const ndarray_view<V, OM, L>& o, cudaStream_t stream = 0) {
1298  if (!this->copy_memory(o, false, stream))
1299  throw std::runtime_error("copying ndarray to ndarray_view did not succeed. Maybe a shape mismatch?");
1300  return *this;
1301  }
1302 
1307  return assign(o);
1308  }
1309 
1314  return assign(o);
1315  }
1316 
1322  template<class _V>
1323  typename boost::enable_if_c<boost::is_convertible<_V, V>::value, ndarray_view&>::type operator=(
1324  const _V& scalar) {
1325  super::operator=(scalar);
1326  return *this;
1327  }
1328 
1334  template<class OM>
1336  return assign(o);
1337  }
1338 
1344  template<class OM>
1346  return assign(o);
1347  }
1348 
1376  template<int D, int E>
1377  explicit ndarray_view(const ndarray<V, M, L>& o, const index_gen<D, E>& idx) :
1378  ndarray<V, M, L>(o.m_allocator)
1379  {
1380  m_memory = o.mem();
1381  m_ptr = const_cast<V*>(o.ptr());
1382  std::vector<int> shapes;
1383  std::vector<int> strides;
1384  shapes.reserve(D);
1385  strides.reserve(D);
1386  cuvAssert(o.ndim()==D);
1387  for (size_t i = 0; i < D; i++) {
1388  int start = idx.ranges_[i].get_start(0);
1389  int finish = idx.ranges_[i].get_finish(o.shape(i));
1390  int stride = idx.ranges_[i].stride();
1391  if (start < 0)
1392  start += o.shape(i);
1393  if (finish < 0)
1394  finish += o.shape(i);
1395 #ifndef NDEBUG
1396  cuvAssert(finish>start);
1397 #endif
1398  m_ptr += start * o.stride(i);
1399  if (idx.ranges_[i].is_degenerate()) {
1400  // skip dimension
1401  } else {
1402  shapes.push_back((finish - start) / stride);
1403  strides.push_back(o.stride(i) * stride);
1404  }
1405  }
1406  // store in m_info
1407  m_info.resize(shapes.size());
1408  std::copy(shapes.begin(), shapes.end(), m_info.host_shape[0].ptr);
1409  std::copy(strides.begin(), strides.end(), m_info.host_stride[0].ptr);
1410  }
1411 
1419  template<int D, int E>
1420  explicit ndarray_view(const index_gen<D, E>& idx, const ndarray<V, M, L>& o) :
1421  ndarray<V, M, L>(o.m_allocator)
1422  {
1423  m_memory = o.mem();
1424  m_ptr = const_cast<V*>(o.ptr());
1425  std::vector<int> shapes;
1426  std::vector<int> strides;
1427  shapes.reserve(D);
1428  strides.reserve(D);
1429  cuvAssert(o.ndim()==D);
1430  for (size_t i = 0; i < D; i++) {
1431  int start = idx.ranges_[i].get_start(0);
1432  int finish = idx.ranges_[i].get_finish(o.shape(i));
1433  int stride = idx.ranges_[i].stride();
1434  if (start < 0)
1435  start += o.shape(i);
1436  if (finish < 0)
1437  finish += o.shape(i);
1438 #ifndef NDEBUG
1439  cuvAssert(finish>start);
1440 #endif
1441  m_ptr += start * o.stride(i);
1442  if (idx.ranges_[i].is_degenerate()) {
1443  // skip dimension
1444  } else {
1445  shapes.push_back((finish - start) / stride);
1446  strides.push_back(o.stride(i) * stride);
1447  }
1448  }
1449  // store in m_info
1450  m_info.resize(shapes.size());
1451  std::copy(shapes.begin(), shapes.end(), m_info.host_shape[0].ptr);
1452  std::copy(strides.begin(), strides.end(), m_info.host_stride[0].ptr);
1453  }
1454 };
1455  // data_structures
1463 template<class V, class V2, class M, class M2, class L>
1464 bool equal_shape(const ndarray<V, M, L>& a, const ndarray<V2, M2, L>& b) {
1465  return a.effective_shape() == b.effective_shape();
1466 }
1467 
1471 
1472 template<class Mat, class NewVT>
1475 };
1477 template<class Mat, class NewML>
1480 };
1482 template<class Mat, class NewMS>
1485 };
1486 
1489 }
1490 
1497 namespace std {
1498 
1504 template<class V>
1505 ostream& operator<<(ostream& o, const cuv::linear_memory<V, cuv::host_memory_space>& t) {
1506  o << "[ ";
1507  for (unsigned int i = 0; i < t.size(); i++)
1508  o << t[i] << " ";
1509  o << "]";
1510  return o;
1511 }
1512 
1518 template<class V>
1519 ostream& operator<<(ostream& o, const cuv::linear_memory<V, cuv::dev_memory_space>& t_) {
1521  o << "[ ";
1522  for (unsigned int i = 0; i < t.size(); i++)
1523  o << t[i] << " ";
1524  o << "]";
1525  return o;
1526 }
1527 
1533 template<class V>
1534 ostream& operator<<(ostream& o, const cuv::pitched_memory<V, cuv::host_memory_space>& t) {
1535  o << "[ ";
1536  for (unsigned int i = 0; i < t.rows(); i++) {
1537  for (unsigned int j = 0; j < t.rows(); j++) {
1538  o << t(i, j) << " ";
1539  }
1540  if (i < t.rows() - 1)
1541  o << std::endl;
1542  }
1543  o << "]";
1544  return o;
1545 }
1546 
1552 template<class V>
1553 ostream& operator<<(ostream& o, const cuv::pitched_memory<V, cuv::dev_memory_space>& t_) {
1555  o << "[ ";
1556  for (unsigned int i = 0; i < t.rows(); i++) {
1557  for (unsigned int j = 0; j < t.rows(); j++) {
1558  o << t(i, j) << " ";
1559  }
1560  if (i < t.rows() - 1)
1561  o << std::endl;
1562  }
1563  o << "]";
1564  return o;
1565 }
1566 
1573 template<class V, class L>
1574 ostream& operator<<(ostream& o, const cuv::ndarray<V, cuv::dev_memory_space, L>& t) {
1575  return o << cuv::ndarray<V, cuv::host_memory_space, L>(t);
1576 }
1577 
1584 template<class V, class L>
1585 ostream& operator<<(ostream& o, const cuv::ndarray<V, cuv::host_memory_space, L>& t) {
1586  if (t.ndim() == 0)
1587  return o << "[]";
1588 
1589  if (t.ndim() == 1) {
1590  o << "[ ";
1591  for (unsigned int i = 0; i < t.shape(0); i++)
1592  o << t[i] << " ";
1593  return o << "]";
1594  }
1595  if (t.ndim() == 2) {
1596  o << "[";
1597  for (unsigned int i = 0; i < t.shape(0); ++i) {
1598  if (i > 0)
1599  o << " ";
1600  o << "[ ";
1601  for (unsigned int j = 0; j < t.shape(1); j++)
1602  o << t(i, j) << " ";
1603  o << "]";
1604  if (i != t.shape(0) - 1)
1605  o << std::endl;
1606  }
1607  return o << "]";
1608  }
1609  if (t.ndim() == 3) {
1610  o << "[" << std::endl;
1611  for (unsigned int l = 0; l < t.shape(0); l++) {
1612  o << "[";
1613  for (unsigned int i = 0; i < t.shape(1); ++i) {
1614  if (i > 0)
1615  o << " ";
1616  o << "[ ";
1617  //for(unsigned int j=0;j<t.shape(2);j++) o<< t(l,i,j)<<" ";
1618  for (unsigned int j = 0; j < t.shape(2); j++)
1619  o << t[l * t.shape(1) * t.shape(2) + i * t.shape(2) + j] << " ";
1620  o << "]";
1621  if (i != t.shape(1) - 1)
1622  o << std::endl;
1623  }
1624  o << "]";
1625  if (l < t.shape(0) - 1)
1626  o << std::endl;
1627  }
1628  return o << "]";
1629  }
1630  throw std::runtime_error("printing of ndarrays with >3 dimensions not implemented");
1631 }
1632 } // io
1634 #endif