12 #ifndef MLPACK_CORE_DATA_LOAD_CSV_HPP 13 #define MLPACK_CORE_DATA_LOAD_CSV_HPP 15 #include <boost/spirit/include/qi.hpp> 16 #include <boost/algorithm/string/trim.hpp> 43 LoadCSV(
const std::string& file);
54 template<
typename T,
typename PolicyType>
55 void Load(arma::Mat<T> &inout,
57 const bool transpose =
true)
62 TransposeParse(inout, infoSet);
64 NonTransposeParse(inout, infoSet);
77 template<
typename T,
typename MapPolicy>
80 using namespace boost::spirit;
89 inFile.seekg(0, std::ios::beg);
95 while (std::getline(inFile, line))
103 inFile.seekg(0, std::ios::beg);
106 while (std::getline(inFile, line))
115 auto findColSize = [&cols](iter_type) { ++cols; };
116 qi::parse(line.begin(), line.end(),
117 stringRule[findColSize] % delimiterRule);
122 if (MapPolicy::NeedsFirstPass)
125 auto firstPassMap = [&](
const iter_type& iter)
127 std::string str(iter.begin(), iter.end());
130 info.template MapFirstPass<T>(std::move(str), rows - 1);
134 qi::parse(line.begin(), line.end(),
135 stringRule[firstPassMap] % delimiterRule);
150 template<
typename T,
typename MapPolicy>
155 using namespace boost::spirit;
164 inFile.seekg(0, std::ios::beg);
169 while (std::getline(inFile, line))
178 auto findRowSize = [&rows](iter_type) { ++rows; };
179 qi::parse(line.begin(), line.end(),
180 stringRule[findRowSize] % delimiterRule);
187 if (MapPolicy::NeedsFirstPass)
192 auto firstPassMap = [&](
const iter_type& iter)
194 std::string str(iter.begin(), iter.end());
197 info.template MapFirstPass<T>(std::move(str), dim++);
201 qi::parse(line.begin(), line.end(),
202 stringRule[firstPassMap] % delimiterRule);
208 using iter_type = boost::iterator_range<std::string::iterator>;
222 template<
typename T,
typename PolicyType>
223 void NonTransposeParse(arma::Mat<T>& inout,
226 using namespace boost::spirit;
230 GetMatrixSize<T>(rows, cols, infoSet);
233 inout.set_size(rows, cols);
240 inFile.seekg(0, std::ios::beg);
242 auto setCharClass = [&](iter_type
const &iter)
244 std::string str(iter.begin(), iter.end());
251 inout(row, col++) = infoSet.template MapString<T>(std::move(str), row);
254 while (std::getline(inFile, line))
261 const bool canParse = qi::parse(line.begin(), line.end(),
262 stringRule[setCharClass] % delimiterRule);
267 std::ostringstream oss;
268 oss <<
"LoadCSV::NonTransposeParse(): wrong number of dimensions (" 269 << col <<
") on line " << row <<
"; should be " << cols
271 throw std::runtime_error(oss.str());
276 std::ostringstream oss;
277 oss <<
"LoadCSV::NonTransposeParse(): parsing error on line " << col
279 throw std::runtime_error(oss.str());
292 template<
typename T,
typename PolicyType>
295 using namespace boost::spirit;
299 GetTransposeMatrixSize<T>(rows, cols, infoSet);
302 inout.set_size(rows, cols);
309 inFile.seekg(0, std::ios::beg);
315 auto parseString = [&](iter_type
const &iter)
318 std::string str(iter.begin(), iter.end());
321 inout(row, col) = infoSet.template MapString<T>(std::move(str), row);
325 while (std::getline(inFile, line))
335 const bool canParse = qi::parse(line.begin(), line.end(),
336 stringRule[parseString] % delimiterRule);
341 std::ostringstream oss;
342 oss <<
"LoadCSV::TransposeParse(): wrong number of dimensions (" << row
343 <<
") on line " << col <<
"; should be " << rows <<
" dimensions.";
344 throw std::runtime_error(oss.str());
349 std::ostringstream oss;
350 oss <<
"LoadCSV::TransposeParse(): parsing error on line " << col
352 throw std::runtime_error(oss.str());
361 boost::spirit::qi::rule<std::string::iterator, iter_type()> stringRule;
363 boost::spirit::qi::rule<std::string::iterator, iter_type()> delimiterRule;
366 std::string extension;
368 std::string filename;
370 std::ifstream inFile;
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
Load the csv file.This class use boost::spirit to implement the parser, please refer to following lin...
void Load(arma::Mat< T > &inout, DatasetMapper< PolicyType > &infoSet, const bool transpose=true)
Load the file into the given matrix with the given DatasetMapper object.
Linear algebra utility functions, generally performed on matrices or vectors.
void GetTransposeMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
Peek at the file to determine the number of rows and columns in the matrix, assuming a transposed mat...
LoadCSV(const std::string &file)
Construct the LoadCSV object on the given file.
void GetMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
Peek at the file to determine the number of rows and columns in the matrix, assuming a non-transposed...
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
void SetDimensionality(const size_t dimensionality)
Set the dimensionality of an existing DatasetMapper object.