curfil: src/ndarray/src/cuv/memory.hpp Source File

 #ifndef __CUV_MEMORY_HPP__
 #define __CUV_MEMORY_HPP__
 
 #include <boost/make_shared.hpp>
 #include <boost/shared_ptr.hpp>
 #include <cuda_runtime_api.h>
 #include <limits>
 #include <stdexcept>
 
 #include "allocators.hpp"
 #include "reference.hpp"
 
 namespace boost {
 namespace serialization {
 class access;
 }
 }
 
 namespace cuv {
 
 
 struct column_major {
 };
 struct row_major {
 };
 
 struct linear_memory_tag {
 };
 
 struct pitched_memory_tag {
 };
  // tags
 namespace detail {
 
 template<class value_type>
 void copy(value_type* dst, const value_type* src, size_t size, host_memory_space, host_memory_space, cudaStream_t);
 
 template<class value_type>
 void copy(value_type* dst, const value_type* src, size_t size, host_memory_space, dev_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy(value_type* dst, const value_type2* src, size_t size, host_memory_space, host_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy(value_type* dst, const value_type2* src, size_t size, host_memory_space, dev_memory_space, cudaStream_t);
 
 template<class value_type>
 void copy(value_type* dst, const value_type* src, size_t size, dev_memory_space, host_memory_space, cudaStream_t);
 
 template<class value_type>
 void copy(value_type* dst, const value_type* src, size_t size, dev_memory_space, dev_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy(value_type* dst, const value_type2* src, size_t size, dev_memory_space, host_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy(value_type* dst, const value_type2* src, size_t size, dev_memory_space, dev_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
         host_memory_space, host_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
         host_memory_space, dev_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
         dev_memory_space, host_memory_space, cudaStream_t);
 
 template<class value_type, class value_type2>
 void copy2d(value_type* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
         dev_memory_space, dev_memory_space, cudaStream_t);
 }
 
 template<class V, class M>
 class memory {
 
 public:
     typedef typename unconst<V>::type value_type; 
     typedef const V const_value_type; 
     typedef M memory_space_type; 
     typedef unsigned int size_type; 
     typedef int index_type; 
     typedef reference<V, M> reference_type; 
     typedef const reference<V, M> const_reference_type; 
 
 private:
     friend class boost::serialization::access;
 
     memory(const memory&);
 
     memory& operator=(const memory& o);
 
 protected:
     V* m_ptr; 
     size_type m_size; 
     boost::shared_ptr<allocator> m_allocator; 
     bool m_owned; 
 
     void check_size_limit(size_t size) const {
         if (size > static_cast<size_t>(std::numeric_limits<index_type>::max())) {
             throw std::runtime_error("maximum memory size exceeded");
         }
     }
 
 public:
 
     V* ptr() {
         return m_ptr;
     }
 
     const V* ptr() const {
         return m_ptr;
     }
 
     size_type size() const {
         return m_size;
     }
 
     size_type memsize() const {
         return size() * sizeof(V);
     }
 
     void reset(V* p, size_type s) {
         m_ptr = p;
         m_size = s;
     }
 
     explicit memory(const boost::shared_ptr<allocator>& _allocator) :
             m_ptr(NULL), m_size(0), m_allocator(_allocator), m_owned(true) {
     }
 
     explicit memory(value_type* ptr, size_type size, const boost::shared_ptr<allocator>& _allocator, bool owned = true) :
             m_ptr(ptr), m_size(size), m_allocator(_allocator), m_owned(owned) {
     }
 
     ~memory() {
         dealloc();
     }
 
     void dealloc() {
         if (m_ptr && m_owned) {
             m_allocator->dealloc(reinterpret_cast<void**>(&this->m_ptr), memory_space_type());
         }
         m_ptr = NULL;
         m_size = 0;
     }
 
     template<class value_type2, class memory_space>
     void copy_from(V* dst, const value_type2* src, size_t size, memory_space m, cudaStream_t stream) {
         detail::copy(dst, src, size, M(), m, stream);
     }
 
     template<class value_type2, class memory_space>
     void copy_from(const value_type2* src, size_t size, memory_space m, cudaStream_t stream) {
         copy_from(m_ptr, src, size, m, stream);
     }
 
     template<class value_type2, class memory_space>
     void copy2d_from(V* dst, const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
             memory_space m, cudaStream_t stream) {
         detail::copy2d(dst, src, dpitch, spitch, h, w, M(), m, stream);
     }
 
     template<class value_type2, class memory_space>
     void copy2d_from(const value_type2* src, size_t dpitch, size_t spitch, size_t h, size_t w,
             memory_space m, cudaStream_t stream) {
         copy2d_from(m_ptr, src, dpitch, spitch, h, w, m, stream);
     }
 
 };
 
 template<class V, class M>
 class linear_memory: public memory<V, M> {
 private:
     typedef memory<V, M> super;
     public:
     typedef typename super::value_type value_type; 
     typedef typename super::const_value_type const_value_type; 
     typedef typename super::memory_space_type memory_space_type; 
     typedef typename super::index_type index_type; 
     typedef typename super::size_type size_type; 
     typedef typename super::reference_type reference_type; 
     typedef typename super::const_reference_type const_reference_type; 
 
 private:
 
     friend class boost::serialization::access;
     typedef linear_memory<V, M> my_type; 
     using super::m_ptr;
     using super::m_size;
     using super::m_allocator;
 
 public:
 
     explicit linear_memory(const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
             memory<V, M>(_allocator) {
     }
 
     explicit linear_memory(size_type i, const boost::shared_ptr<allocator> _allocator =
             boost::make_shared<default_allocator>()) :
             memory<V, M>(_allocator) {
         m_size = i;
         alloc();
     }
 
     value_type* release() {
         value_type* ptr = m_ptr;
         m_ptr = NULL;
         return ptr;
     }
 
     void set_size(size_type s) {
         if (s != this->size()) {
             this->dealloc();
             m_size = s;
             alloc();
         }
     }
 
     void alloc() {
         assert(this->m_ptr == NULL);
         if (m_size > 0)
             m_allocator->alloc(reinterpret_cast<void**>(&m_ptr), m_size, sizeof(V), memory_space_type());
     }
 
     my_type& operator=(const my_type& o) {
         if (this == &o)
             return *this;
 
         if (this->size() != o.size()) {
             this->dealloc();
             m_size = o.size();
             this->alloc();
         }
 
         // TODO async copy
         cudaStream_t stream = 0;
         this->copy_from(o, stream);
 
         return *this;
     }
 
     template<class OM>
     my_type& operator=(const linear_memory<value_type, OM>& o) {
         if (this->size() != o.size()) {
             this->dealloc();
             m_size = o.size();
             this->alloc();
         }
 
         // TODO async copy
         cudaStream_t stream = 0;
         this->copy_from(o, stream);
         return *this;
     }
 
     explicit linear_memory(const my_type& o) :
             memory<V, M>(o.m_allocator) {
         operator=(o);
     }
 
     template<class OM>
     explicit linear_memory(const linear_memory<V, OM>& o) :
             memory<V, M>(o.m_allocator) {
         operator=(o);
     }
 
     reference_type operator[](const index_type& idx) {
         assert(idx >= 0);
         assert((size_type) idx < m_size);
         return reference_type(this->m_ptr + idx);
     }
 
     const_reference_type operator[](const index_type& idx) const {
         assert(idx >= 0);
         assert((size_type) idx < m_size);
         return const_reference_type(this->m_ptr + idx);
     }
 
     ~linear_memory() {
         this->dealloc();
     }
 
     void set_strides(linear_memory<index_type, cuv::host_memory_space>& strides,
             const linear_memory<size_type, cuv::host_memory_space>& shape, row_major) {
         size_t size = 1;
         for (int i = shape.size() - 1; i >= 0; --i) {
             strides[i] = (shape[i] == 1) ? 0 : size;
             size *= shape[i];
         }
         this->check_size_limit(size);
     }
 
     void set_strides(linear_memory<index_type, cuv::host_memory_space>& strides,
             const linear_memory<size_type, cuv::host_memory_space>& shape, column_major) {
         size_t size = 1;
         for (size_t i = 0; i < shape.size(); ++i) {
             strides[i] = (shape[i] == 1) ? 0 : size;
             size *= shape[i];
         }
         this->check_size_limit(size);
     }
 
     void reverse() {
         if (IsSame<dev_memory_space, memory_space_type>::Result::value)
             throw std::runtime_error("reverse of dev linear memory not implemented");
         value_type* __first = m_ptr, *__last = m_ptr + this->size();
         while (true)
             if (__first == __last || __first == --__last)
                 return;
             else {
                 std::iter_swap(__first, __last);
                 ++__first;
             }
     }
 
     template<class value_type2, class memory_space>
     void copy_from(const value_type2* src, size_t size, memory_space m, cudaStream_t stream) {
         memory<V, M>::copy_from(src, size, m, stream);
     }
 
     template<class V2, class OM>
     void copy_from(const linear_memory<V2, OM>& src, cudaStream_t stream) const {
         detail::copy(m_ptr, src.ptr(), src.size(), M(), OM(), stream);
     }
 
 };
 
 template<class V, class M>
 class pitched_memory: public memory<V, M> {
 
 private:
     typedef memory<V, M> super;
 
 public:
 
     typedef typename super::value_type value_type; 
     typedef typename super::const_value_type const_value_type; 
     typedef typename super::memory_space_type memory_space_type; 
     typedef typename super::index_type index_type; 
     typedef typename super::size_type size_type; 
     typedef typename super::reference_type reference_type; 
     typedef typename super::const_reference_type const_reference_type; 
 
 private:
     friend class boost::serialization::access;
     typedef pitched_memory<V, M> my_type; 
     size_type m_rows; 
     size_type m_cols; 
     size_type m_pitch; 
     using super::m_ptr;
     using super::m_size;
     using super::m_allocator;
     public:
 
     size_type rows() const {
         return m_rows;
     }
 
     size_type cols() const {
         return m_cols;
     }
 
     size_type pitch() const {
         return m_pitch;
     }
 
     size_type size() const {
         return m_rows * m_pitch;
     }
 
     size_type memsize() const {
         return size() * sizeof(V);
     }
 
     explicit pitched_memory(const boost::shared_ptr<allocator> _allocator = boost::make_shared<default_allocator>()) :
             memory<V, M>(_allocator), m_rows(0), m_cols(0), m_pitch(0) {
     }
 
     explicit pitched_memory(index_type i, index_type j, const boost::shared_ptr<allocator> _allocator =
             boost::make_shared<default_allocator>()) :
             memory<V, M>(_allocator), m_rows(i), m_cols(j), m_pitch(0) {
         alloc();
     }
 
     void alloc() {
         assert(this->m_ptr == NULL);
         size_t pitch;
         m_allocator->alloc2d(reinterpret_cast<void**>(&this->m_ptr), pitch, m_rows, m_cols, sizeof(V),
                 memory_space_type());
         assert(this->m_ptr != NULL);
         m_pitch = pitch;
         assert(m_pitch % sizeof(value_type) == 0);
         m_pitch /= sizeof(value_type);
         m_size = m_rows * m_pitch; // in class memory
     }
 
     value_type* release() {
         value_type* ptr = m_ptr;
         m_ptr = NULL;
         return ptr;
     }
 
     void set_size(size_type rows, size_type cols) {
         if (cols > m_pitch || rows > m_rows) {
             this->dealloc();
             m_rows = rows;
             m_cols = cols;
             this->alloc();
         } else {
             m_rows = rows;
             m_cols = cols;
         }
     }
 
     my_type& operator=(const my_type& o) {
         if (this == &o)
             return *this;
 
         if (m_pitch < o.m_cols || m_rows < o.m_rows) {
             this->dealloc();
             m_cols = o.m_cols;
             m_rows = o.m_rows;
             this->alloc();
         }
         m_cols = o.m_cols;
         m_rows = o.m_rows;
         this->copy_from(o);
         return *this;
     }
 
     template<class OM>
     my_type&
     operator=(const pitched_memory<value_type, OM>& o) {
         if (m_pitch < o.m_cols || m_rows < o.m_rows) {
             this->dealloc();
             m_cols = o.m_cols;
             m_rows = o.m_rows;
             this->alloc();
         }
         m_cols = o.m_cols;
         m_rows = o.m_rows;
         this->copy_from(o);
         return *this;
     }
 
     reference_type operator[](const index_type& idx) {
         assert(idx >= 0);
         index_type row = idx / m_cols;
         index_type col = idx % m_cols;
         assert((size_type) row < m_rows);
         assert((size_type) col < m_cols);
         return reference_type(this->m_ptr + row * m_pitch + col);
     }
 
     const_reference_type operator[](const index_type& idx) const {
         return const_cast<pitched_memory&>(*this)(idx);
     }
 
     reference_type operator()(const index_type& i, const index_type& j) {
         assert(i >= 0);
         assert(j >= 0);
         assert((size_type) i < m_rows);
         assert((size_type) j < m_cols);
         return reference_type(this->m_ptr + i * m_pitch + j);
     }
     const_reference_type operator()(const index_type& i, const index_type& j) const {
         return const_cast<pitched_memory&>(*this)(i, j);
     }
 
     void set_strides(linear_memory<index_type, cuv::host_memory_space>& strides,
             const linear_memory<size_type, cuv::host_memory_space>& shape, row_major) {
         size_type size = 1;
         assert(shape.size() >= 2);
         const int pitched_dim = shape.size() - 1;
         for (int i = shape.size() - 1; i >= 0; --i) {
             if (shape[i] == 1) {
                 strides[i] = 0;
             } else if (i == pitched_dim) {
                 strides[i] = 1;
                 size *= pitch();
             } else {
                 strides[i] = size;
                 size *= shape[i];
             }
         }
     }
     void set_strides(linear_memory<index_type, cuv::host_memory_space>& strides,
             const linear_memory<size_type, cuv::host_memory_space>& shape, column_major) {
         size_type size = 1;
         assert(shape.size() >= 2);
         const size_type pitched_dim = 0;
         for (unsigned int i = 0; i < shape.size(); ++i) {
             if (shape[i] == 1) {
                 strides[i] = 0;
             } else if (i == pitched_dim) {
                 strides[i] = 1;
                 size *= pitch();
             } else {
                 strides[i] = size;
                 size *= shape[i];
             }
         }
     }
 
     template<class V2, class OM>
     void copy2d_from(const memory<V2, OM> src, cudaStream_t stream) const {
         memory<V, M>::copy2d_from(m_ptr, src.ptr(), m_pitch / sizeof(value_type), src.m_pitch / sizeof(V2),
                 m_rows, m_cols, M(), OM(), stream);
     }
 
     template<class V2, class OM>
     void copy_from(const pitched_memory<V2, OM>& src, cudaStream_t stream) const {
         detail::copy(m_ptr, src.ptr(), src.size(), M(), OM(), stream);
     }
 
 };
  // data_structures
 namespace detail {
 
 inline bool is_c_contiguous(row_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
         const linear_memory<int, cuv::host_memory_space>& stride) {
     bool c_contiguous = true;
     int size = 1;
     for (int i = shape.size() - 1; (i >= 0) && c_contiguous; --i) {
         if (shape[i] == 1)
             continue;
         if (stride[i] != size)
             c_contiguous = false;
         size = size * shape[i];
     }
     return c_contiguous;
 }
 
 inline bool is_c_contiguous(column_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
         const linear_memory<int, cuv::host_memory_space>& stride) {
     bool c_contiguous = true;
     int size = 1;
     for (unsigned int i = 0; i < shape.size() && c_contiguous; ++i) {
         if (shape[i] == 1)
             continue;
         if (stride[i] != size)
             c_contiguous = false;
         size = size * shape[i];
     }
     return c_contiguous;
 }
 
 inline bool is_2dcopyable(row_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
         const linear_memory<int, cuv::host_memory_space>& stride) {
     bool c_contiguous = shape.size() > 1;
     int pitched_dim = shape.size() - 1; // last dim
     while (shape[pitched_dim] == 1 && stride[pitched_dim] == 1)
         pitched_dim--;
     int size = 1;
     for (int i = shape.size() - 1; (i >= 0) && c_contiguous; --i) {
         if (shape[i] == 1) {
             continue;
         } else if (i == pitched_dim) {
             size *= stride[i - 1];
         } else if (stride[i] != size) {
             c_contiguous = false;
         } else {
             size *= shape[i];
         }
     }
     return c_contiguous;
 }
 
 inline bool is_2dcopyable(column_major, const linear_memory<unsigned int, cuv::host_memory_space>& shape,
         const linear_memory<int, cuv::host_memory_space>& stride) {
     bool c_contiguous = shape.size() > 1;
     unsigned int pitched_dim = 0;
     while (shape[pitched_dim] == 1 && stride[pitched_dim] == 1)
         pitched_dim++;
     int size = 1;
     for (unsigned int i = 0; (i < shape.size()) && c_contiguous; ++i) {
         if (shape[i] == 1) {
             continue;
         } else if (i == pitched_dim) {
             size *= stride[i];
         } else if (stride[i] != size) {
             c_contiguous = false;
         } else {
             size *= shape[i];
         }
     }
     return c_contiguous;
 }
 
 }
 }
 
 #endif