hoeffding_tree_model.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_HOEFFDING_TREE_HOEFFDING_TREE_MODEL_HPP
13 #define MLPACK_METHODS_HOEFFDING_TREE_HOEFFDING_TREE_MODEL_HPP
14 
15 #include "hoeffding_tree.hpp"
16 #include "binary_numeric_split.hpp"
17 #include "information_gain.hpp"
18 
19 namespace mlpack {
20 namespace tree {
21 
28 {
29  public:
31  enum TreeType
32  {
37  };
38 
43  typedef HoeffdingTree<GiniImpurity, BinaryDoubleNumericSplit,
49  typedef HoeffdingTree<InformationGain, BinaryDoubleNumericSplit,
51 
60 
67 
74 
81 
88 
93 
114  void BuildModel(const arma::mat& dataset,
115  const data::DatasetInfo& datasetInfo,
116  const arma::Row<size_t>& labels,
117  const size_t numClasses,
118  const bool batchTraining,
119  const double successProbability,
120  const size_t maxSamples,
121  const size_t checkInterval,
122  const size_t minSamples,
123  const size_t bins,
124  const size_t observationsBeforeBinning);
125 
134  void Train(const arma::mat& dataset,
135  const arma::Row<size_t>& labels,
136  const bool batchTraining);
137 
145  void Classify(const arma::mat& dataset,
146  arma::Row<size_t>& predictions) const;
147 
156  void Classify(const arma::mat& dataset,
157  arma::Row<size_t>& predictions,
158  arma::rowvec& probabilities) const;
159 
163  size_t NumNodes() const;
164 
168  template<typename Archive>
169  void serialize(Archive& ar, const unsigned int /* version */)
170  {
171  // Clear memory if needed.
172  if (Archive::is_loading::value)
173  {
174  delete giniHoeffdingTree;
175  delete giniBinaryTree;
176  delete infoHoeffdingTree;
177  delete infoBinaryTree;
178 
179  giniHoeffdingTree = NULL;
180  giniBinaryTree = NULL;
181  infoHoeffdingTree = NULL;
182  infoBinaryTree = NULL;
183  }
184 
185  ar & BOOST_SERIALIZATION_NVP(type);
186 
187  // Fake dataset info may be needed to create fake trees.
188  data::DatasetInfo info;
189  if (type == GINI_HOEFFDING)
190  {
191  // Create fake tree to load into if needed.
192  if (Archive::is_loading::value)
193  giniHoeffdingTree = new GiniHoeffdingTreeType(info, 1, 1);
194  ar & BOOST_SERIALIZATION_NVP(giniHoeffdingTree);
195  }
196  else if (type == GINI_BINARY)
197  {
198  // Create fake tree to load into if needed.
199  if (Archive::is_loading::value)
200  giniBinaryTree = new GiniBinaryTreeType(info, 1, 1);
201  ar & BOOST_SERIALIZATION_NVP(giniBinaryTree);
202  }
203  else if (type == INFO_HOEFFDING)
204  {
205  // Create fake tree to load into if needed.
206  if (Archive::is_loading::value)
207  infoHoeffdingTree = new InfoHoeffdingTreeType(info, 1, 1);
208  ar & BOOST_SERIALIZATION_NVP(infoHoeffdingTree);
209  }
210  else if (type == INFO_BINARY)
211  {
212  // Create fake tree to load into if needed.
213  if (Archive::is_loading::value)
214  infoBinaryTree = new InfoBinaryTreeType(info, 1, 1);
215  ar & BOOST_SERIALIZATION_NVP(infoBinaryTree);
216  }
217  }
218 
219  private:
221  TreeType type;
222 
225  GiniHoeffdingTreeType* giniHoeffdingTree;
226 
229  GiniBinaryTreeType* giniBinaryTree;
230 
233  InfoHoeffdingTreeType* infoHoeffdingTree;
234 
237  InfoBinaryTreeType* infoBinaryTree;
238 };
239 
240 } // namespace tree
241 } // namespace mlpack
242 
243 #endif
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
~HoeffdingTreeModel()
Clean up the given model.
HoeffdingTreeModel & operator=(const HoeffdingTreeModel &other)
Copy the Hoeffding tree model from the given other model.
HoeffdingTree< GiniImpurity, BinaryDoubleNumericSplit, HoeffdingCategoricalSplit > GiniBinaryTreeType
Convenience typedef for GINI_BINARY tree type.
void Classify(const arma::mat &dataset, arma::Row< size_t > &predictions) const
Using the model, classify the given test points.
The HoeffdingTree object represents all of the necessary information for a Hoeffding-bound-based deci...
.hpp
Definition: add_to_po.hpp:21
HoeffdingTree< InformationGain, HoeffdingDoubleNumericSplit, HoeffdingCategoricalSplit > InfoHoeffdingTreeType
Convenience typedef for INFO_HOEFFDING tree type.
HoeffdingTreeModel(const TreeType &type=GINI_HOEFFDING)
Construct the Hoeffding tree model, but don&#39;t initialize any tree.
HoeffdingTree< GiniImpurity, HoeffdingDoubleNumericSplit, HoeffdingCategoricalSplit > GiniHoeffdingTreeType
Convenience typedef for GINI_HOEFFDING tree type.
The standard information gain criterion, used for calculating gain in decision trees.
void serialize(Archive &ar, const unsigned int)
Serialize the model.
void BuildModel(const arma::mat &dataset, const data::DatasetInfo &datasetInfo, const arma::Row< size_t > &labels, const size_t numClasses, const bool batchTraining, const double successProbability, const size_t maxSamples, const size_t checkInterval, const size_t minSamples, const size_t bins, const size_t observationsBeforeBinning)
Train the model on the given dataset with the given labels.
BinaryNumericSplit< FitnessFunction, double > BinaryDoubleNumericSplit
TreeType
This enumerates the four types of trees we can hold.
void Train(const arma::mat &dataset, const arma::Row< size_t > &labels, const bool batchTraining)
Train in streaming mode on the given dataset.
This class is a serializable Hoeffding tree model that can hold four different types of Hoeffding tre...
size_t NumNodes() const
Get the number of nodes in the tree.
This is the standard Hoeffding-bound categorical feature proposed in the paper below: ...
HoeffdingNumericSplit< FitnessFunction, double > HoeffdingDoubleNumericSplit
Convenience typedef.
HoeffdingTree< InformationGain, BinaryDoubleNumericSplit, HoeffdingCategoricalSplit > InfoBinaryTreeType
Convenience typedef for INFO_BINARY tree type.