curfil  ..
 All Classes Functions Variables Typedefs Friends Groups Pages
score.h
1 #ifndef CURFIL_SCORE_H
2 #define CURFIL_SCORE_H
3 
4 #include <assert.h>
5 #include <cuv/ndarray.hpp>
6 
7 #include "utils.h"
8 
9 namespace curfil
10 {
11 
12 typedef double ScoreType;
13 
19 
20 protected:
21 
25  __host__ __device__
26  static ScoreType entropy(const ScoreType prob) {
27  if (prob == 0.0) {
28  return 0.0;
29  }
30  return -prob * log2(prob);
31  }
32 
36  __host__ __device__
37  static ScoreType normalizeScore(const ScoreType score) {
38 
39  assert(!isnan(score));
40 
41  if (score > 1.0) {
42  assert(fabs(score - 1) < 1e-6);
43  return 1;
44  }
45 
46  if (score < 0.0) {
47  assert(fabs(score - 0) < 1e-6);
48  return 0;
49  }
50 
51  assertProbability(score);
52  return score;
53  }
54 
55 public:
56 
60  template<class W>
61  __host__ __device__
62  static ScoreType calculateScore(const size_t numLabels, const W* leftClasses, const W* rightClasses,
63  const unsigned int leftRightStride, const W* allClasses, const ScoreType totalLeft,
64  const ScoreType totalRight) {
65 
66  const ScoreType total = totalLeft + totalRight;
67 
68  const ScoreType leftProb = totalLeft / total;
69  const ScoreType rightProb = totalRight / total;
70 
71 #ifndef NDEBUG
72  W totalLeftTest = 0;
73  W totalRightTest = 0;
74 #endif
75 
76  ScoreType score = 0;
77 
78  for (size_t label = 0; label < numLabels; label++) {
79  const size_t offset = label * leftRightStride;
80  const W& leftValue = leftClasses[offset];
81  const W& rightValue = rightClasses[offset];
82 
83 #ifndef NDEBUG
84  assert(leftValue <= allClasses[label]);
85  assert(rightValue <= allClasses[label]);
86  assert(leftValue <= total);
87  assert(rightValue <= total);
88  totalLeftTest += leftValue;
89  totalRightTest += rightValue;
90 #endif
91 
92  const ScoreType classProb = allClasses[label] / total;
93  assertProbability(classProb);
94 
95  if (leftValue > 0) {
96  ScoreType classProbLeft = leftValue / total;
97  assertProbability(classProbLeft);
98  score += classProbLeft * log2(classProbLeft / (leftProb * classProb));
99  }
100 
101  if (rightValue > 0) {
102  ScoreType classProbRight = rightValue / total;
103  assertProbability(classProbRight);
104  score += classProbRight * log2(classProbRight / (rightProb * classProb));
105  }
106  }
107 
108  assert(totalLeftTest == totalLeft);
109  assert(totalRightTest == totalRight);
110 
111  return normalizeScore(score);
112  }
113 };
114 
121 protected:
122 
126  __host__ __device__
127  static ScoreType splitEntropy(const ScoreType total, const ScoreType totalLeft, const ScoreType totalRight) {
128  ScoreType H_s = (entropy(totalLeft) + entropy(totalRight) - entropy(total)) / total;
129 
130  assert(!isnan(H_s));
131  assert(H_s >= 0);
132 
133  return H_s;
134  }
135 
139  template<class W>
140  __host__ __device__
141  static ScoreType classificationEntropy(const size_t numLabels, const W* allClasses, const ScoreType total) {
142 
143  ScoreType H_c = 0;
144 
145  for (size_t label = 0; label < numLabels; label++) {
146  const W& value = allClasses[label];
147  assert(value <= total);
148  if (value > 0) {
149  H_c += entropy(value);
150  }
151  }
152 
153  H_c -= entropy(total);
154  H_c /= total;
155 
156  assert(!isnan(H_c));
157  assert(H_c >= 0);
158 
159  return H_c;
160  }
161 
162 public:
163 
164  template<class W>
165  __host__ __device__
166  static ScoreType calculateScore(const size_t numClasses, const W* leftClasses, const W* rightClasses,
167  const unsigned int leftRightStride, const W* allClasses, const ScoreType totalLeft,
168  const ScoreType totalRight) {
169 
170  // Compute information gain due to split decision
171  const ScoreType informationGain = InformationGainScore::calculateScore(numClasses,
172  leftClasses, rightClasses, leftRightStride,
173  allClasses, totalLeft, totalRight);
174 
175  if (informationGain == 0) {
176  // skip calculation of split entropy
177  return 0;
178  }
179 
180  ScoreType total = totalLeft + totalRight;
181  assert(total > 0);
182 
183  const ScoreType H_s = splitEntropy(total, totalLeft, totalRight);
184  const ScoreType H_c = classificationEntropy(numClasses, allClasses, total);
185 
186  ScoreType score = (2 * informationGain) / (H_s + H_c);
187  return normalizeScore(score);
188  }
189 };
190 
192 class NoOpScore: public InformationGainScore {
193 
194 public:
195  template<class W>
196  __host__ __device__
197  static ScoreType calculateScore(const size_t numLabels, const W* leftClasses, const W* rightClasses,
198  const unsigned int leftRightStride, const W* allClasses, const ScoreType totalLeft,
199  const ScoreType totalRight) {
200 
201  const ScoreType total = totalLeft + totalRight;
202 
203  ScoreType score = 0;
204 
205  for (size_t label = 0; label < numLabels; label++) {
206  const size_t offset = label * leftRightStride;
207  const W& leftValue = leftClasses[offset];
208  const W& rightValue = rightClasses[offset];
209 
210  score += leftValue;
211  score += rightValue;
212  score += allClasses[label];
213  }
214 
215  return score / (3.0 * total);
216  }
217 };
218 
220 
221 }
222 
223 #endif