curfil  ..
 All Classes Functions Variables Typedefs Friends Groups Pages
memory.hpp
1 #ifndef __CUV_MEMORY_HPP__
2 #define __CUV_MEMORY_HPP__
3 
4 #include <boost/make_shared.hpp>
5 #include <boost/shared_ptr.hpp>
6 #include <cuda_runtime_api.h>
7 #include <limits>
8 #include <stdexcept>
9 
10 #include "allocators.hpp"
11 #include "reference.hpp"
12 
13 namespace boost {
14 namespace serialization {
15 class access;
16 }
17 }
18 
19 namespace cuv {
20 
30 
31 struct column_major {
32 };
34 struct row_major {
35 };
36 
39 };
40 
43 };
44  // tags
46 namespace detail {
47 
49 template<class value_type>
50 void copy(value_type* dst, const value_type* src, size_t size, host_memory_space, host_memory_space, cudaStream_t);
51 
53 template<class value_type>
54 void copy(value_type* dst, const value_type* src, size_t size, host_memory_space, dev_memory_space, cudaStream_t);
55 
57 template<class value_type, class value_type2>
58 void copy(value_type* dst, const value_type2* src, size_t size, host_memory_space, host_memory_space, cudaStream_t);
59 
61 template<class value_type, class value_type2>
62 void copy(value_type* dst, const value_type2* src, size_t size, host_memory_space, dev_memory_space, cudaStream_t);
63 
65 template<class value_type>
66 void copy(value_type* dst, const value_type* src, size_t size, dev_memory_space, host_memory_space, cudaStream_t);
67 
69 template<class value_type>
70 void copy(value_type* dst, const value_type* src, size_t size, dev_memory_space, dev_memory_space, cudaStream_t);
71 
73 template<class value_type, class value_type2>
74 void copy(value_type* dst, const value_type2* src, size_t size, dev_memory_space, host_memory_space, cudaStream_t);
75 
77 template<class value_type, class value_type2>
78 void copy(value_type* dst, const value_type2* src, size_t size, dev_memory_space, dev_memory_space, cudaStream_t);
79 
81 template<class value_type, class value_type2>
82 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
83  host_memory_space, host_memory_space, cudaStream_t);
84 
86 template<class value_type, class value_type2>
87 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
88  host_memory_space, dev_memory_space, cudaStream_t);
89 
91 template<class value_type, class value_type2>
92 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
93  dev_memory_space, host_memory_space, cudaStream_t);
94 
96 template<class value_type, class value_type2>
97 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
98  dev_memory_space, dev_memory_space, cudaStream_t);
99 }
100 
104 template<class V, class M>
105 class memory {
106 
107 public:
108  typedef typename unconst<V>::type value_type;
109  typedef const V const_value_type;
110  typedef M memory_space_type;
111  typedef unsigned int size_type;
112  typedef int index_type;
115 
116 private:
117  friend class boost::serialization::access;
118 
120  memory(const memory&);
121 
123  memory& operator=(const memory& o);
124 
125 protected:
126  V* m_ptr;
128  boost::shared_ptr<allocator> m_allocator;
129  bool m_owned;
130 
131  void check_size_limit(size_t size) const {
132  if (size > static_cast<size_t>(std::numeric_limits<index_type>::max())) {
133  throw std::runtime_error("maximum memory size exceeded");
134  }
135  }
136 
137 public:
138 
140  V* ptr() {
141  return m_ptr;
142  }
143 
145  const V* ptr() const {
146  return m_ptr;
147  }
148 
150  size_type size() const {
151  return m_size;
152  }
153 
155  size_type memsize() const {
156  return size() * sizeof(V);
157  }
158 
160  void reset(V* p, size_type s) {
161  m_ptr = p;
162  m_size = s;
163  }
164 
166  explicit memory(const boost::shared_ptr<allocator>& _allocator) :
167  m_ptr(NULL), m_size(0), m_allocator(_allocator), m_owned(true) {
168  }
169 
171  explicit memory(value_type* ptr, size_type size, const boost::shared_ptr<allocator>& _allocator, bool owned = true) :
172  m_ptr(ptr), m_size(size), m_allocator(_allocator), m_owned(owned) {
173  }
174 
177  dealloc();
178  }
179 
181  void dealloc() {
182  if (m_ptr && m_owned) {
183  m_allocator->dealloc(reinterpret_cast<void**>(&this->m_ptr), memory_space_type());
184  }
185  m_ptr = NULL;
186  m_size = 0;
187  }
188 
189  template<class value_type2, class memory_space>
190  void copy_from(V* dst, const value_type2* src, size_t size, memory_space m, cudaStream_t stream) {
191  detail::copy(dst, src, size, M(), m, stream);
192  }
193 
194  template<class value_type2, class memory_space>
195  void copy_from(const value_type2* src, size_t size, memory_space m, cudaStream_t stream) {
196  copy_from(m_ptr, src, size, m, stream);
197  }
198 
199  template<class value_type2, class memory_space>
200  void copy2d_from(V* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
201  memory_space m, cudaStream_t stream) {
202  detail::copy2d(dst, src, dpitch, spitch, h, w, M(), m, stream);
203  }
204 
205  template<class value_type2, class memory_space>
206  void copy2d_from(const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
207  memory_space m, cudaStream_t stream) {
208  copy2d_from(m_ptr, src, dpitch, spitch, h, w, m, stream);
209  }
210 
211 };
212 
216 template<class V, class M>
217 class linear_memory: public memory<V, M> {
218 private:
219  typedef memory<V, M> super;
220  public:
221  typedef typename super::value_type value_type;
224  typedef typename super::index_type index_type;
225  typedef typename super::size_type size_type;
228 
229 private:
230 
231  friend class boost::serialization::access;
232  typedef linear_memory<V, M> my_type;
233  using super::m_ptr;
234  using super::m_size;
235  using super::m_allocator;
236 
237 public:
238 
240  explicit linear_memory(const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
241  memory<V, M>(_allocator) {
242  }
243 
247  explicit linear_memory(size_type i, const boost::shared_ptr<allocator> _allocator =
248  boost::make_shared<default_allocator>()) :
249  memory<V, M>(_allocator) {
250  m_size = i;
251  alloc();
252  }
253 
256  value_type* ptr = m_ptr;
257  m_ptr = NULL;
258  return ptr;
259  }
260 
262  void set_size(size_type s) {
263  if (s != this->size()) {
264  this->dealloc();
265  m_size = s;
266  alloc();
267  }
268  }
269 
271  void alloc() {
272  assert(this->m_ptr == NULL);
273  if (m_size > 0)
274  m_allocator->alloc(reinterpret_cast<void**>(&m_ptr), m_size, sizeof(V), memory_space_type());
275  }
276 
285  my_type& operator=(const my_type& o) {
286  if (this == &o)
287  return *this;
288 
289  if (this->size() != o.size()) {
290  this->dealloc();
291  m_size = o.size();
292  this->alloc();
293  }
294 
295  // TODO async copy
296  cudaStream_t stream = 0;
297  this->copy_from(o, stream);
298 
299  return *this;
300  }
301 
312  template<class OM>
314  if (this->size() != o.size()) {
315  this->dealloc();
316  m_size = o.size();
317  this->alloc();
318  }
319 
320  // TODO async copy
321  cudaStream_t stream = 0;
322  this->copy_from(o, stream);
323  return *this;
324  }
325 
329  explicit linear_memory(const my_type& o) :
330  memory<V, M>(o.m_allocator) {
331  operator=(o);
332  }
333 
337  template<class OM>
338  explicit linear_memory(const linear_memory<V, OM>& o) :
339  memory<V, M>(o.m_allocator) {
340  operator=(o);
341  }
342 
348  assert(idx >= 0);
349  assert((size_type) idx < m_size);
350  return reference_type(this->m_ptr + idx);
351  }
352 
360  assert(idx >= 0);
361  assert((size_type) idx < m_size);
362  return const_reference_type(this->m_ptr + idx);
363  }
364 
367  this->dealloc();
368  }
369 
373  size_t size = 1;
374  for (int i = shape.size() - 1; i >= 0; --i) {
375  strides[i] = (shape[i] == 1) ? 0 : size;
376  size *= shape[i];
377  }
378  this->check_size_limit(size);
379  }
380 
384  size_t size = 1;
385  for (size_t i = 0; i < shape.size(); ++i) {
386  strides[i] = (shape[i] == 1) ? 0 : size;
387  size *= shape[i];
388  }
389  this->check_size_limit(size);
390  }
391 
396  void reverse() {
398  throw std::runtime_error("reverse of dev linear memory not implemented");
399  value_type* __first = m_ptr, *__last = m_ptr + this->size();
400  while (true)
401  if (__first == __last || __first == --__last)
402  return;
403  else {
404  std::iter_swap(__first, __last);
405  ++__first;
406  }
407  }
408 
409  template<class value_type2, class memory_space>
410  void copy_from(const value_type2* src, size_t size, memory_space m, cudaStream_t stream) {
411  memory<V, M>::copy_from(src, size, m, stream);
412  }
413 
414  template<class V2, class OM>
415  void copy_from(const linear_memory<V2, OM>& src, cudaStream_t stream) const {
416  detail::copy(m_ptr, src.ptr(), src.size(), M(), OM(), stream);
417  }
418 
419 };
420 
424 template<class V, class M>
425 class pitched_memory: public memory<V, M> {
426 
427 private:
428  typedef memory<V, M> super;
429 
430 public:
431 
432  typedef typename super::value_type value_type;
435  typedef typename super::index_type index_type;
436  typedef typename super::size_type size_type;
439 
440 private:
441  friend class boost::serialization::access;
442  typedef pitched_memory<V, M> my_type;
443  size_type m_rows;
444  size_type m_cols;
445  size_type m_pitch;
446  using super::m_ptr;
447  using super::m_size;
448  using super::m_allocator;
449  public:
450 
452  size_type rows() const {
453  return m_rows;
454  }
455 
457  size_type cols() const {
458  return m_cols;
459  }
460 
462  size_type pitch() const {
463  return m_pitch;
464  }
465 
467  size_type size() const {
468  return m_rows * m_pitch;
469  }
470 
472  size_type memsize() const {
473  return size() * sizeof(V);
474  }
475 
477  explicit pitched_memory(const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
478  memory<V, M>(_allocator), m_rows(0), m_cols(0), m_pitch(0) {
479  }
480 
485  explicit pitched_memory(index_type i, index_type j, const boost::shared_ptr<allocator> _allocator =
486  boost::make_shared<default_allocator>()) :
487  memory<V, M>(_allocator), m_rows(i), m_cols(j), m_pitch(0) {
488  alloc();
489  }
490 
494  void alloc() {
495  assert(this->m_ptr == NULL);
496  size_t pitch;
497  m_allocator->alloc2d(reinterpret_cast<void**>(&this->m_ptr), pitch, m_rows, m_cols, sizeof(V),
499  assert(this->m_ptr != NULL);
500  m_pitch = pitch;
501  assert(m_pitch % sizeof(value_type) == 0);
502  m_pitch /= sizeof(value_type);
503  m_size = m_rows * m_pitch; // in class memory
504  }
505 
508  value_type* ptr = m_ptr;
509  m_ptr = NULL;
510  return ptr;
511  }
512 
519  if (cols > m_pitch || rows > m_rows) {
520  this->dealloc();
521  m_rows = rows;
522  m_cols = cols;
523  this->alloc();
524  } else {
525  m_rows = rows;
526  m_cols = cols;
527  }
528  }
529 
538  my_type& operator=(const my_type& o) {
539  if (this == &o)
540  return *this;
541 
542  if (m_pitch < o.m_cols || m_rows < o.m_rows) {
543  this->dealloc();
544  m_cols = o.m_cols;
545  m_rows = o.m_rows;
546  this->alloc();
547  }
548  m_cols = o.m_cols;
549  m_rows = o.m_rows;
550  this->copy_from(o);
551  return *this;
552  }
553 
564  template<class OM>
565  my_type&
567  if (m_pitch < o.m_cols || m_rows < o.m_rows) {
568  this->dealloc();
569  m_cols = o.m_cols;
570  m_rows = o.m_rows;
571  this->alloc();
572  }
573  m_cols = o.m_cols;
574  m_rows = o.m_rows;
575  this->copy_from(o);
576  return *this;
577  }
578 
584  assert(idx >= 0);
585  index_type row = idx / m_cols;
586  index_type col = idx % m_cols;
587  assert((size_type) row < m_rows);
588  assert((size_type) col < m_cols);
589  return reference_type(this->m_ptr + row * m_pitch + col);
590  }
591 
599  return const_cast<pitched_memory&>(*this)(idx);
600  }
601 
610  assert(i >= 0);
611  assert(j >= 0);
612  assert((size_type) i < m_rows);
613  assert((size_type) j < m_cols);
614  return reference_type(this->m_ptr + i * m_pitch + j);
615  }
618  return const_cast<pitched_memory&>(*this)(i, j);
619  }
620 
633  size_type size = 1;
634  assert(shape.size() >= 2);
635  const int pitched_dim = shape.size() - 1;
636  for (int i = shape.size() - 1; i >= 0; --i) {
637  if (shape[i] == 1) {
638  strides[i] = 0;
639  } else if (i == pitched_dim) {
640  strides[i] = 1;
641  size *= pitch();
642  } else {
643  strides[i] = size;
644  size *= shape[i];
645  }
646  }
647  }
655  size_type size = 1;
656  assert(shape.size() >= 2);
657  const size_type pitched_dim = 0;
658  for (unsigned int i = 0; i < shape.size(); ++i) {
659  if (shape[i] == 1) {
660  strides[i] = 0;
661  } else if (i == pitched_dim) {
662  strides[i] = 1;
663  size *= pitch();
664  } else {
665  strides[i] = size;
666  size *= shape[i];
667  }
668  }
669  }
670 
671  template<class V2, class OM>
672  void copy2d_from(const memory<V2, OM> src, cudaStream_t stream) const {
673  memory<V, M>::copy2d_from(m_ptr, src.ptr(), m_pitch / sizeof(value_type), src.m_pitch / sizeof(V2),
674  m_rows, m_cols, M(), OM(), stream);
675  }
676 
677  template<class V2, class OM>
678  void copy_from(const pitched_memory<V2, OM>& src, cudaStream_t stream) const {
679  detail::copy(m_ptr, src.ptr(), src.size(), M(), OM(), stream);
680  }
681 
682 };
683  // data_structures
685 namespace detail {
686 
690 inline bool is_c_contiguous(row_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
691  const linear_memory<int, cuv::host_memory_space>& stride) {
692  bool c_contiguous = true;
693  int size = 1;
694  for (int i = shape.size() - 1; (i >= 0) && c_contiguous; --i) {
695  if (shape[i] == 1)
696  continue;
697  if (stride[i] != size)
698  c_contiguous = false;
699  size = size * shape[i];
700  }
701  return c_contiguous;
702 }
703 
707 inline bool is_c_contiguous(column_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
708  const linear_memory<int, cuv::host_memory_space>& stride) {
709  bool c_contiguous = true;
710  int size = 1;
711  for (unsigned int i = 0; i < shape.size() && c_contiguous; ++i) {
712  if (shape[i] == 1)
713  continue;
714  if (stride[i] != size)
715  c_contiguous = false;
716  size = size * shape[i];
717  }
718  return c_contiguous;
719 }
720 
722 inline bool is_2dcopyable(row_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
723  const linear_memory<int, cuv::host_memory_space>& stride) {
724  bool c_contiguous = shape.size() > 1;
725  int pitched_dim = shape.size() - 1; // last dim
726  while (shape[pitched_dim] == 1 && stride[pitched_dim] == 1)
727  pitched_dim--;
728  int size = 1;
729  for (int i = shape.size() - 1; (i >= 0) && c_contiguous; --i) {
730  if (shape[i] == 1) {
731  continue;
732  } else if (i == pitched_dim) {
733  size *= stride[i - 1];
734  } else if (stride[i] != size) {
735  c_contiguous = false;
736  } else {
737  size *= shape[i];
738  }
739  }
740  return c_contiguous;
741 }
742 
744 inline bool is_2dcopyable(column_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
745  const linear_memory<int, cuv::host_memory_space>& stride) {
746  bool c_contiguous = shape.size() > 1;
747  unsigned int pitched_dim = 0;
748  while (shape[pitched_dim] == 1 && stride[pitched_dim] == 1)
749  pitched_dim++;
750  int size = 1;
751  for (unsigned int i = 0; (i < shape.size()) && c_contiguous; ++i) {
752  if (shape[i] == 1) {
753  continue;
754  } else if (i == pitched_dim) {
755  size *= stride[i];
756  } else if (stride[i] != size) {
757  c_contiguous = false;
758  } else {
759  size *= shape[i];
760  }
761  }
762  return c_contiguous;
763 }
764 
765 }
766 }
767 
768 #endif