1 #ifndef CURFIL_RANDOMTREEIMAGE_H
2 #define CURFIL_RANDOMTREEIMAGE_H
6 #include <boost/make_shared.hpp>
7 #include <cuv/ndarray.hpp>
13 #include "random_tree.h"
41 x(other.x), y(other.y) {
59 return XY(newX, newY);
66 return (x == other.x && y == other.y);
73 return !(*
this == other);
113 PixelInstance(
const RGBDImage* image,
const LabelType& label, uint16_t x, uint16_t y, HorizontalFlipSetting setting = NoFlip) :
114 image(image), label(label), point(x, y), depth(
Depth::INVALID), horFlipSetting(setting) {
115 assert(image != NULL);
118 throw std::runtime_error(
"image is not integrated");
121 int aboveValid = (y > 0) ? image->
getDepthValid(x, y - 1) : 0;
122 int leftValid = (x > 0) ? image->
getDepthValid(x - 1, y) : 0;
123 int aboveLeftValid = (x > 0 && y > 0) ? image->
getDepthValid(x - 1, y - 1) : 0;
125 int valid = image->
getDepthValid(x, y) - (leftValid + aboveValid - aboveLeftValid);
126 assert(valid == 0 || valid == 1);
133 depth = image->
getDepth(x, y) - (left + above - aboveLeft);
149 uint16_t x, uint16_t y, HorizontalFlipSetting setting = NoFlip) :
150 image(image), label(label), point(x, y), depth(depth), horFlipSetting(setting) {
151 assert(image != NULL);
181 return static_cast<uint16_t
>(point.
getX());
188 return static_cast<uint16_t
>(point.
getY());
197 assert(region.
getX() >= 0);
198 assert(region.
getY() >= 0);
202 const int width = std::max(1, region.
getX());
203 const int height = std::max(1, region.
getY());
208 int leftX = x -
width;
209 int rightX = x +
width;
213 if (leftX < 0 || rightX >= image->
getWidth() || upperY < 0 || lowerY >= image->
getHeight()) {
214 return std::numeric_limits<double>::quiet_NaN();
217 assert(inImage(x, y));
219 Point upperLeft(leftX, upperY);
220 Point upperRight(rightX, upperY);
221 Point lowerLeft(leftX, lowerY);
222 Point lowerRight(rightX, lowerY);
224 FeatureResponseType lowerRightPixel = getColor(lowerRight, channel);
225 FeatureResponseType lowerLeftPixel = getColor(lowerLeft, channel);
226 FeatureResponseType upperRightPixel = getColor(upperRight, channel);
227 FeatureResponseType upperLeftPixel = getColor(upperLeft, channel);
229 if (isnan(lowerRightPixel) || isnan(lowerLeftPixel) || isnan(upperRightPixel) || isnan(upperLeftPixel))
230 return std::numeric_limits<double>::quiet_NaN();
232 FeatureResponseType sum = (lowerRightPixel - upperRightPixel) + (upperLeftPixel - lowerLeftPixel);
242 assert(region.
getX() >= 0);
243 assert(region.
getY() >= 0);
247 const int width = std::max(1, region.
getX());
248 const int height = std::max(1, region.
getY());
253 int leftX = x -
width;
254 int rightX = x +
width;
258 if (leftX < 0 || rightX >= image->
getWidth() || upperY < 0 || lowerY >= image->
getHeight()) {
259 return std::numeric_limits<double>::quiet_NaN();
262 assert(inImage(x, y));
264 Point upperLeft(leftX, upperY);
265 Point upperRight(rightX, upperY);
266 Point lowerLeft(leftX, lowerY);
267 Point lowerRight(rightX, lowerY);
269 int upperLeftValid = getDepthValid(upperLeft);
270 int upperRightValid = getDepthValid(upperRight);
271 int lowerRightValid = getDepthValid(lowerRight);
272 int lowerLeftValid = getDepthValid(lowerLeft);
274 int numValid = (lowerRightValid - upperRightValid) + (upperLeftValid - lowerLeftValid);
275 assert(numValid >= 0);
278 return std::numeric_limits<double>::quiet_NaN();
286 int sum = (lowerRightDepth - upperRightDepth) + (upperLeftDepth - lowerLeftDepth);
287 FeatureResponseType feat = sum /
static_cast<FeatureResponseType
>(1000);
288 return (feat / numValid);
317 return horFlipSetting;
325 horFlipSetting = setting;
333 HorizontalFlipSetting horFlipSetting;
335 float getColor(
const Point& pos, uint8_t channel)
const {
337 return std::numeric_limits<float>::quiet_NaN();
343 Depth
getDepth(
const Point& pos)
const {
348 const Depth depth = image->
getDepth(pos.getX(), pos.getY());
350 assert(depth.getIntValue() >= 0);
354 int getDepthValid(
const Point& pos)
const {
358 bool inImage(
int x,
int y)
const {
362 bool inImage(
const Point& pos)
const {
363 return inImage(pos.getX(), pos.getY());
393 const uint8_t channel1,
396 const uint8_t channel2) :
397 featureType(featureType),
404 if (offset1 == offset2) {
405 throw std::runtime_error(
"illegal feature: offset1 equals offset2");
411 featureType(), offset1(), region1(), channel1(), offset2(), region2(), channel2() {
419 sortKey |=
static_cast<uint8_t
>(
getType() & 0x03) << 30;
420 sortKey |=
static_cast<uint8_t
>(
getChannel1() & 0x0F) << 26;
421 sortKey |=
static_cast<uint8_t
>(
getChannel2() & 0x0F) << 22;
422 sortKey |=
static_cast<uint8_t
>((
getOffset1().
getY() + 127) & 0xFF) << 14;
423 sortKey |=
static_cast<uint8_t
>((
getOffset1().
getX() + 127) & 0xFF) << 6;
438 switch (featureType) {
444 throw std::runtime_error(
"unknown feature");
452 return (offset1 != offset2);
460 switch (featureType) {
462 return calculateDepthFeature(instance, flipRegion);
464 return calculateColorFeature(instance, flipRegion);
518 return !(*
this == other);
527 FeatureType featureType;
537 FeatureResponseType calculateColorFeature(
const PixelInstance& instance,
bool flipRegion)
const {
541 return std::numeric_limits<double>::quiet_NaN();
544 FeatureResponseType a;
554 FeatureResponseType b;
567 FeatureResponseType calculateDepthFeature(
const PixelInstance& instance,
bool flipRegion)
const {
569 const Depth depth = instance.getDepth();
570 if (!depth.isValid()) {
571 return std::numeric_limits<double>::quiet_NaN();
574 FeatureResponseType a;
576 a = instance.averageRegionDepth(Offset(-offset1.
getX(),offset1.
getY()).normalize(depth), region1.
normalize(depth));
583 FeatureResponseType b;
585 b = instance.averageRegionDepth(Offset(-offset2.
getX(),offset2.
getY()).normalize(depth), region2.
normalize(depth));
608 template<
class memory_space>
629 boost::shared_ptr<cuv::allocator> allocator) :
636 template<
class other_memory_space>
644 template<
class other_memory_space>
676 return m_features[cuv::indices[0][cuv::index_range()]];
683 return m_features[cuv::indices[1][cuv::index_range()]];
690 return m_features[cuv::indices[2][cuv::index_range()]];
697 return m_features[cuv::indices[3][cuv::index_range()]];
704 return m_features[cuv::indices[4][cuv::index_range()]];
711 return m_features[cuv::indices[5][cuv::index_range()]];
718 return m_features[cuv::indices[6][cuv::index_range()]];
725 return m_features[cuv::indices[7][cuv::index_range()]];
732 return m_features[cuv::indices[8][cuv::index_range()]];
739 return m_features[cuv::indices[9][cuv::index_range()]];
746 return m_features[cuv::indices[10][cuv::index_range()]];
760 return m_features[cuv::indices[0][cuv::index_range()]];
767 return m_features[cuv::indices[1][cuv::index_range()]];
774 return m_features[cuv::indices[2][cuv::index_range()]];
781 return m_features[cuv::indices[3][cuv::index_range()]];
788 return m_features[cuv::indices[4][cuv::index_range()]];
795 return m_features[cuv::indices[5][cuv::index_range()]];
802 return m_features[cuv::indices[6][cuv::index_range()]];
809 return m_features[cuv::indices[7][cuv::index_range()]];
816 return m_features[cuv::indices[8][cuv::index_range()]];
823 return m_features[cuv::indices[9][cuv::index_range()]];
830 return m_features[cuv::indices[10][cuv::index_range()]];
895 template<
class memory_space>
913 depths(reinterpret_cast<float*>(
data[cuv::indices[0][cuv::index_range()]].ptr())),
914 sampleX(reinterpret_cast<int*>(
data[cuv::indices[1][cuv::index_range()]].ptr())),
915 sampleY(reinterpret_cast<int*>(
data[cuv::indices[2][cuv::index_range()]].ptr())),
916 imageNumbers(reinterpret_cast<int*>(
data[cuv::indices[3][cuv::index_range()]].ptr())),
917 labels(reinterpret_cast<uint8_t*>(
data[cuv::indices[4][cuv::index_range()]].ptr())),
918 horFlipSetting(reinterpret_cast<HorizontalFlipSetting*>(
data[cuv::indices[5][cuv::index_range()]].ptr())){
927 depths(reinterpret_cast<float*>(
data[cuv::indices[0][cuv::index_range()]].ptr())),
928 sampleX(reinterpret_cast<int*>(
data[cuv::indices[1][cuv::index_range()]].ptr())),
929 sampleY(reinterpret_cast<int*>(
data[cuv::indices[2][cuv::index_range()]].ptr())),
930 imageNumbers(reinterpret_cast<int*>(
data[cuv::indices[3][cuv::index_range()]].ptr())),
931 labels(reinterpret_cast<uint8_t*>(
data[cuv::indices[4][cuv::index_range()]].ptr())),
932 horFlipSetting(reinterpret_cast<HorizontalFlipSetting*>(
data[cuv::indices[5][cuv::index_range()]].ptr())){
938 Samples(
size_t numSamples, boost::shared_ptr<cuv::allocator>& allocator) :
939 data(6, numSamples, allocator),
940 depths(reinterpret_cast<float*>(
data[cuv::indices[0][cuv::index_range()]].ptr())),
941 sampleX(reinterpret_cast<int*>(
data[cuv::indices[1][cuv::index_range()]].ptr())),
942 sampleY(reinterpret_cast<int*>(
data[cuv::indices[2][cuv::index_range()]].ptr())),
943 imageNumbers(reinterpret_cast<int*>(
data[cuv::indices[3][cuv::index_range()]].ptr())),
944 labels(reinterpret_cast<uint8_t*>(
data[cuv::indices[4][cuv::index_range()]].ptr())),
945 horFlipSetting(reinterpret_cast<HorizontalFlipSetting*>(
data[cuv::indices[5][cuv::index_range()]].ptr()))
948 assert_equals(
labels, reinterpret_cast<uint8_t*>(
data.
ptr() + 4 * numSamples));
970 treeId(treeId), configuration(configuration),
971 imageWidth(0), imageHeight(0),
972 sampleDataAllocator(boost::make_shared<cuv::pooled_cuda_allocator>(
"sampleData")),
973 featuresAllocator(boost::make_shared<cuv::pooled_cuda_allocator>(
"feature")),
974 keysIndicesAllocator(boost::make_shared<cuv::pooled_cuda_allocator>(
"keysIndices")),
975 scoresAllocator(boost::make_shared<cuv::pooled_cuda_allocator>(
"scores")),
976 countersAllocator(boost::make_shared<cuv::pooled_cuda_allocator>(
"counters")),
977 featureResponsesAllocator(boost::make_shared<cuv::pooled_cuda_allocator>(
"featureResponses")) {
989 std::vector<const PixelInstance*> > >& samplesPerNode);
994 std::vector<std::vector<const PixelInstance*> >
prepare(
const std::vector<const PixelInstance*>& samples,
1000 std::vector<std::vector<const PixelInstance*> >
prepare(
const std::vector<const PixelInstance*>& samples,
1007 const std::vector<const PixelInstance*>& batches,
1014 const std::vector<const PixelInstance*>& batches,
1020 template<
class memory_space>
1027 template<
class memory_space>
1030 const std::vector<std::vector<const PixelInstance*> >& batches,
1037 template<
class memory_space>
1045 void selectDevice();
1049 void copyFeaturesToDevice();
1052 cudaStream_t stream);
1055 const std::vector<const PixelInstance*>&)
const;
1057 const size_t treeId;
1060 unsigned int imageWidth;
1061 unsigned int imageHeight;
1063 boost::shared_ptr<cuv::allocator> sampleDataAllocator;
1064 boost::shared_ptr<cuv::allocator> featuresAllocator;
1065 boost::shared_ptr<cuv::allocator> keysIndicesAllocator;
1066 boost::shared_ptr<cuv::allocator> scoresAllocator;
1067 boost::shared_ptr<cuv::allocator> countersAllocator;
1068 boost::shared_ptr<cuv::allocator> featureResponsesAllocator;
1096 void train(
const std::vector<LabeledRGBDImage>& trainLabelImages,
1097 RandomSource& randomSource,
size_t subsampleCount,
size_t numLabels);
1114 const boost::shared_ptr<RandomTree<PixelInstance, ImageFeatureFunction> >&
getTree()
const {
1122 return classLabelPriorDistribution;
1139 void doTrain(
RandomSource& randomSource,
size_t numClasses,
1140 std::vector<const PixelInstance*>& subsamples);
1142 bool finishedTraining;
1147 boost::shared_ptr<RandomTree<PixelInstance, ImageFeatureFunction> > tree;
1151 void calculateLabelPriorDistribution(
const std::vector<LabeledRGBDImage>& trainLabelImages);
1153 std::vector<PixelInstance> subsampleTrainingDataPixelUniform(
1154 const std::vector<LabeledRGBDImage>& trainLabelImages,
1155 RandomSource& randomSource,
size_t subsampleCount)
const;
1157 std::vector<PixelInstance> subsampleTrainingDataClassUniform(
1158 const std::vector<LabeledRGBDImage>& trainLabelImages,
1159 RandomSource& randomSource,
size_t subsampleCount)
const;
1169 std::ostream& operator<<(std::ostream& os,
const curfil::XY& xy);