1 #ifndef __CUV_MEMORY_HPP__
2 #define __CUV_MEMORY_HPP__
4 #include <boost/make_shared.hpp>
5 #include <boost/shared_ptr.hpp>
6 #include <cuda_runtime_api.h>
10 #include "allocators.hpp"
11 #include "reference.hpp"
14 namespace serialization {
49 template<
class value_type>
53 template<
class value_type>
57 template<
class value_type,
class value_type2>
61 template<
class value_type,
class value_type2>
65 template<
class value_type>
69 template<
class value_type>
73 template<
class value_type,
class value_type2>
77 template<
class value_type,
class value_type2>
81 template<
class value_type,
class value_type2>
82 void copy2d(value_type* dst,
const value_type2* src,
size_t dpitch,
size_t spitch,
size_t h,
size_t w,
86 template<
class value_type,
class value_type2>
87 void copy2d(value_type* dst,
const value_type2* src,
size_t dpitch,
size_t spitch,
size_t h,
size_t w,
91 template<
class value_type,
class value_type2>
92 void copy2d(value_type* dst,
const value_type2* src,
size_t dpitch,
size_t spitch,
size_t h,
size_t w,
96 template<
class value_type,
class value_type2>
97 void copy2d(value_type* dst,
const value_type2* src,
size_t dpitch,
size_t spitch,
size_t h,
size_t w,
104 template<
class V,
class M>
117 friend class boost::serialization::access;
131 void check_size_limit(
size_t size)
const {
132 if (size > static_cast<size_t>(std::numeric_limits<index_type>::max())) {
133 throw std::runtime_error(
"maximum memory size exceeded");
156 return size() *
sizeof(V);
166 explicit memory(
const boost::shared_ptr<allocator>& _allocator) :
189 template<
class value_type2,
class memory_space>
190 void copy_from(V* dst,
const value_type2* src,
size_t size, memory_space m, cudaStream_t stream) {
191 detail::copy(dst, src, size, M(), m, stream);
194 template<
class value_type2,
class memory_space>
195 void copy_from(
const value_type2* src,
size_t size, memory_space m, cudaStream_t stream) {
196 copy_from(
m_ptr, src, size, m, stream);
199 template<
class value_type2,
class memory_space>
200 void copy2d_from(V* dst,
const value_type2* src,
size_t dpitch,
size_t spitch,
size_t h,
size_t w,
201 memory_space m, cudaStream_t stream) {
202 detail::copy2d(dst, src, dpitch, spitch, h, w, M(), m, stream);
205 template<
class value_type2,
class memory_space>
206 void copy2d_from(
const value_type2* src,
size_t dpitch,
size_t spitch,
size_t h,
size_t w,
207 memory_space m, cudaStream_t stream) {
208 copy2d_from(
m_ptr, src, dpitch, spitch, h, w, m, stream);
216 template<
class V,
class M>
231 friend class boost::serialization::access;
240 explicit linear_memory(
const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
241 memory<V, M>(_allocator) {
248 boost::make_shared<default_allocator>()) :
249 memory<V, M>(_allocator) {
263 if (s != this->
size()) {
272 assert(this->
m_ptr == NULL);
296 cudaStream_t stream = 0;
297 this->copy_from(o, stream);
321 cudaStream_t stream = 0;
322 this->copy_from(o, stream);
374 for (
int i = shape.
size() - 1; i >= 0; --i) {
375 strides[i] = (shape[i] == 1) ? 0 : size;
378 this->check_size_limit(size);
385 for (
size_t i = 0; i < shape.
size(); ++i) {
386 strides[i] = (shape[i] == 1) ? 0 : size;
389 this->check_size_limit(size);
398 throw std::runtime_error(
"reverse of dev linear memory not implemented");
401 if (__first == __last || __first == --__last)
404 std::iter_swap(__first, __last);
409 template<
class value_type2,
class memory_space>
410 void copy_from(
const value_type2* src,
size_t size, memory_space m, cudaStream_t stream) {
414 template<
class V2,
class OM>
415 void copy_from(
const linear_memory<V2, OM>& src, cudaStream_t stream)
const {
416 detail::copy(
m_ptr, src.ptr(), src.size(), M(), OM(), stream);
424 template<
class V,
class M>
441 friend class boost::serialization::access;
468 return m_rows * m_pitch;
473 return size() *
sizeof(V);
477 explicit pitched_memory(
const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
478 memory<V, M>(_allocator), m_rows(0), m_cols(0), m_pitch(0) {
486 boost::make_shared<default_allocator>()) :
487 memory<V, M>(_allocator), m_rows(i), m_cols(j), m_pitch(0) {
495 assert(this->
m_ptr == NULL);
497 m_allocator->alloc2d(reinterpret_cast<void**>(&this->
m_ptr), pitch, m_rows, m_cols,
sizeof(V),
499 assert(this->
m_ptr != NULL);
503 m_size = m_rows * m_pitch;
519 if (cols > m_pitch || rows > m_rows) {
542 if (m_pitch < o.m_cols || m_rows < o.m_rows) {
567 if (m_pitch < o.m_cols || m_rows < o.m_rows) {
634 assert(shape.
size() >= 2);
635 const int pitched_dim = shape.
size() - 1;
636 for (
int i = shape.
size() - 1; i >= 0; --i) {
639 }
else if (i == pitched_dim) {
656 assert(shape.
size() >= 2);
658 for (
unsigned int i = 0; i < shape.
size(); ++i) {
661 }
else if (i == pitched_dim) {
671 template<
class V2,
class OM>
672 void copy2d_from(
const memory<V2, OM> src, cudaStream_t stream)
const {
674 m_rows, m_cols, M(), OM(), stream);
677 template<
class V2,
class OM>
678 void copy_from(
const pitched_memory<V2, OM>& src, cudaStream_t stream)
const {
679 detail::copy(
m_ptr, src.ptr(), src.size(), M(), OM(), stream);
690 inline bool is_c_contiguous(row_major,
const linear_memory<unsigned int, cuv::host_memory_space>& shape,
691 const linear_memory<int, cuv::host_memory_space>& stride) {
692 bool c_contiguous =
true;
694 for (
int i = shape.size() - 1; (i >= 0) && c_contiguous; --i) {
697 if (stride[i] != size)
698 c_contiguous =
false;
699 size = size * shape[i];
707 inline bool is_c_contiguous(column_major,
const linear_memory<unsigned int, cuv::host_memory_space>& shape,
708 const linear_memory<int, cuv::host_memory_space>& stride) {
709 bool c_contiguous =
true;
711 for (
unsigned int i = 0; i < shape.size() && c_contiguous; ++i) {
714 if (stride[i] != size)
715 c_contiguous =
false;
716 size = size * shape[i];
722 inline bool is_2dcopyable(row_major,
const linear_memory<unsigned int, cuv::host_memory_space>& shape,
723 const linear_memory<int, cuv::host_memory_space>& stride) {
724 bool c_contiguous = shape.size() > 1;
725 int pitched_dim = shape.size() - 1;
726 while (shape[pitched_dim] == 1 && stride[pitched_dim] == 1)
729 for (
int i = shape.size() - 1; (i >= 0) && c_contiguous; --i) {
732 }
else if (i == pitched_dim) {
733 size *= stride[i - 1];
734 }
else if (stride[i] != size) {
735 c_contiguous =
false;
744 inline bool is_2dcopyable(column_major,
const linear_memory<unsigned int, cuv::host_memory_space>& shape,
745 const linear_memory<int, cuv::host_memory_space>& stride) {
746 bool c_contiguous = shape.size() > 1;
747 unsigned int pitched_dim = 0;
748 while (shape[pitched_dim] == 1 && stride[pitched_dim] == 1)
751 for (
unsigned int i = 0; (i < shape.size()) && c_contiguous; ++i) {
754 }
else if (i == pitched_dim) {
756 }
else if (stride[i] != size) {
757 c_contiguous =
false;