curfil  ..
 All Classes Functions Variables Typedefs Friends Groups Pages
random_tree_image_gpu.h
1 #ifndef CURFIL_RANDOM_TREE_IMAGE_GPU_H
2 #define CURFIL_RANDOM_TREE_IMAGE_GPU_H
3 
4 #include <cuda_runtime_api.h>
5 #include <limits.h>
6 #include <map>
7 #include <set>
8 #include <vector_types.h>
9 #include <vector>
10 
11 #include "image.h"
12 #include "random_tree_image.h"
13 
14 namespace curfil {
15 
16 static const int colorChannels = 3;
17 static const int depthChannels = 2;
18 
19 static const int depthChannel = 0;
20 static const int depthValidChannel = 1;
21 
22 static const unsigned int NODES_PER_TREE_LAYER = 2048;
23 static const unsigned int LAYERS_PER_TREE = 16;
24 
29 class TreeNodes {
30 
31 public:
32 
36  TreeNodes(const TreeNodes& other);
37 
41  TreeNodes(const boost::shared_ptr<const RandomTree<PixelInstance, ImageFeatureFunction> >& tree);
42 
46  size_t getTreeId() const {
47  return m_treeId;
48  }
49 
53  size_t numNodes() const {
54  return m_numNodes;
55  }
56 
60  size_t numLabels() const {
61  return m_numLabels;
62  }
63 
67  size_t sizePerNode() const {
68  return m_sizePerNode;
69  }
70 
75  return m_data;
76  }
77 
82  return m_data;
83  }
84 
85 private:
86 
87  static const size_t offsetLeftNode = 0;
88  static const size_t offsetTypes = 4;
89  static const size_t offsetFeatures = offsetTypes + 4;
90  static const size_t offsetChannels = offsetFeatures + 8;
91  static const size_t offsetThreshold = offsetChannels + 4;
92  static const size_t offsetHistograms = offsetThreshold + 4;
93 
94  size_t m_treeId;
95  size_t m_numNodes;
96  size_t m_numLabels;
97  size_t m_sizePerNode;
99 
100  template<class T>
101  void setValue(size_t node, size_t offset, const T& value);
102 
103  void setLeftNodeOffset(size_t node, int offset);
104  void setThreshold(size_t node, float threshold);
105  void setHistogramValue(size_t node, size_t label, float value);
106  void setType(size_t node, int8_t value);
107  void setOffset1X(size_t node, int8_t value);
108  void setOffset1Y(size_t node, int8_t value);
109  void setRegion1X(size_t node, int8_t value);
110  void setRegion1Y(size_t node, int8_t value);
111  void setOffset2X(size_t node, int8_t value);
112  void setOffset2Y(size_t node, int8_t value);
113  void setRegion2X(size_t node, int8_t value);
114  void setRegion2Y(size_t node, int8_t value);
115  void setChannel1(size_t node, uint16_t value);
116  void setChannel2(size_t node, uint16_t value);
117 
118  void convert(const boost::shared_ptr<const RandomTree<PixelInstance, ImageFeatureFunction> >& tree);
119 
120  TreeNodes& operator=(const TreeNodes& other);
121 
122 };
123 
128 class DeviceCache {
129 
130 public:
131 
132  virtual ~DeviceCache();
133 
137  std::map<const void*, size_t>& getIdMap() {
138  return elementIdMap;
139  }
140 
144  bool containsElement(const void* element) const;
145 
150  size_t getElementPos(const void* element) const;
151 
155  void clear();
156 
161  return totalTransferTimeMicroseconds;
162  }
163 
164 private:
165  DeviceCache(const DeviceCache& other);
166  DeviceCache& operator=(const DeviceCache& other);
167 
168 protected:
169 
170  DeviceCache() :
171  cacheSize(0), elementIdMap(), elementTimes(), currentTime(0), bound(false), totalTransferTimeMicroseconds(0) {
172  }
173 
177  bool isBound() const {
178  return bound;
179  }
180 
184  void setBound(bool bound);
185 
189  void updateCacheSize(size_t cacheSize);
190 
191  virtual void bind() = 0;
192  virtual void unbind() = 0;
194  virtual void allocArray() = 0;
195  virtual void freeArray() = 0;
200  size_t getCacheSize() const {
201  return cacheSize;
202  }
203 
207  void copyElements(size_t cacheSize, const std::set<const void*>& elements);
208 
212  virtual void transferElement(size_t pos, const void* element, cudaStream_t stream) = 0;
213 
214  // for logging
215  virtual std::string getElementName(const void* element) const = 0;
216  virtual std::string getElementsName() const = 0;
218 private:
219 
220  size_t cacheSize;
221  std::map<const void*, size_t> elementIdMap;
222  std::map<size_t, size_t> elementTimes;
223 
224  // poor man’s vector clock
225  size_t currentTime;
226 
227  bool bound;
228 
229  size_t totalTransferTimeMicroseconds;
230 
231 };
232 
237 class ImageCache: public DeviceCache {
238 
239 public:
240 
241  ImageCache();
242 
243  virtual ~ImageCache();
244 
248  void copyImages(size_t imageCacheSize, const std::set<const RGBDImage*>& images);
249 
253  void copyImages(size_t imageCacheSize, const std::vector<const PixelInstance*>& samples);
254 
255 private:
256  ImageCache(const ImageCache& other);
257  ImageCache& operator=(const ImageCache& other);
258 
259 protected:
260 
261  virtual void bind();
262  virtual void unbind();
263 
264  virtual void allocArray();
265  virtual void freeArray();
266 
267  virtual void transferElement(size_t pos, const void* element, cudaStream_t stream);
268  virtual std::string getElementName(const void* element) const;
269  virtual std::string getElementsName() const;
270 
271 private:
272 
273  int width;
274  int height;
275 
276  cudaArray* colorTextureData;
277  cudaArray* depthTextureData;
278 
279 };
280 
285 class TreeCache: public DeviceCache {
286 
287 public:
288 
289  TreeCache();
290 
291  virtual ~TreeCache();
292 
296  void copyTree(size_t cacheSize, const TreeNodes* tree);
297 
301  void copyTrees(size_t cacheSize, const std::set<const TreeNodes*>& trees);
302 
303 private:
304  TreeCache(const TreeCache& other);
305  TreeCache& operator=(const TreeCache& other);
306 
307 protected:
308 
309  virtual void transferElement(size_t elementPos, const void* element, cudaStream_t stream);
310  virtual std::string getElementName(const void* element) const;
311  virtual std::string getElementsName() const;
312 
313  virtual void bind();
314  virtual void unbind();
315 
316  virtual void freeArray();
317  virtual void allocArray();
318 
319 private:
320 
321  size_t sizePerNode;
322  LabelType numLabels;
323 
324  cudaArray* treeTextureData;
325 };
326 
327 class RandomTreeImage;
328 
334 
335 public:
337  int type;
338  int8_t offset1X;
339  int8_t offset1Y;
340  int8_t region1X;
341  int8_t region1Y;
342  int8_t offset2X;
343  int8_t offset2Y;
344  int8_t region2X;
345  int8_t region2Y;
346  uint8_t channel1;
347  uint8_t channel2;
348  float threshold;
350 };
351 
355 TreeNodeData getTreeNode(const int nodeNr, const boost::shared_ptr<const TreeNodes>& treeData);
356 
361 boost::shared_ptr<const TreeNodes> convertTree(const boost::shared_ptr<const RandomTreeImage>& randomTreeImage);
362 
367 void normalizeProbabilities(cuv::ndarray<float, cuv::dev_memory_space>& probabilities);
368 
373 void determineMaxProbabilities(const cuv::ndarray<float, cuv::dev_memory_space>& probabilities,
375 
380 void classifyImage(int treeCacheSize, cuv::ndarray<float, cuv::dev_memory_space>& output, const RGBDImage& image,
381  LabelType numLabels, const boost::shared_ptr<const TreeNodes>& treeData, bool useDepthImages = true);
382 
383 
384 // for the unit test
385 void clearImageCache();
386 
387 }
388 
389 #endif