5 #include <cuv/ndarray.hpp>
12 typedef double ScoreType;
26 static ScoreType
entropy(
const ScoreType prob) {
30 return -prob * log2(prob);
39 assert(!isnan(score));
42 assert(fabs(score - 1) < 1e-6);
47 assert(fabs(score - 0) < 1e-6);
51 assertProbability(score);
62 static ScoreType
calculateScore(
const size_t numLabels,
const W* leftClasses,
const W* rightClasses,
63 const unsigned int leftRightStride,
const W* allClasses,
const ScoreType totalLeft,
64 const ScoreType totalRight) {
66 const ScoreType total = totalLeft + totalRight;
68 const ScoreType leftProb = totalLeft / total;
69 const ScoreType rightProb = totalRight / total;
78 for (
size_t label = 0; label < numLabels; label++) {
79 const size_t offset = label * leftRightStride;
80 const W& leftValue = leftClasses[offset];
81 const W& rightValue = rightClasses[offset];
84 assert(leftValue <= allClasses[label]);
85 assert(rightValue <= allClasses[label]);
86 assert(leftValue <= total);
87 assert(rightValue <= total);
88 totalLeftTest += leftValue;
89 totalRightTest += rightValue;
92 const ScoreType classProb = allClasses[label] / total;
93 assertProbability(classProb);
96 ScoreType classProbLeft = leftValue / total;
97 assertProbability(classProbLeft);
98 score += classProbLeft * log2(classProbLeft / (leftProb * classProb));
101 if (rightValue > 0) {
102 ScoreType classProbRight = rightValue / total;
103 assertProbability(classProbRight);
104 score += classProbRight * log2(classProbRight / (rightProb * classProb));
108 assert(totalLeftTest == totalLeft);
109 assert(totalRightTest == totalRight);
127 static ScoreType
splitEntropy(
const ScoreType total,
const ScoreType totalLeft,
const ScoreType totalRight) {
145 for (
size_t label = 0; label < numLabels; label++) {
146 const W& value = allClasses[label];
147 assert(value <= total);
166 static ScoreType
calculateScore(
const size_t numClasses,
const W* leftClasses,
const W* rightClasses,
167 const unsigned int leftRightStride,
const W* allClasses,
const ScoreType totalLeft,
168 const ScoreType totalRight) {
172 leftClasses, rightClasses, leftRightStride,
173 allClasses, totalLeft, totalRight);
175 if (informationGain == 0) {
180 ScoreType total = totalLeft + totalRight;
183 const ScoreType H_s =
splitEntropy(total, totalLeft, totalRight);
186 ScoreType score = (2 * informationGain) / (H_s + H_c);
192 class NoOpScore:
public InformationGainScore {
197 static ScoreType calculateScore(
const size_t numLabels,
const W* leftClasses,
const W* rightClasses,
198 const unsigned int leftRightStride,
const W* allClasses,
const ScoreType totalLeft,
199 const ScoreType totalRight) {
201 const ScoreType total = totalLeft + totalRight;
205 for (
size_t label = 0; label < numLabels; label++) {
206 const size_t offset = label * leftRightStride;
207 const W& leftValue = leftClasses[offset];
208 const W& rightValue = rightClasses[offset];
212 score += allClasses[label];
215 return score / (3.0 * total);