load_numeric_csv.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_DATA_LOAD_NUMERIC_CSV_HPP
13 #define MLPACK_CORE_DATA_LOAD_NUMERIC_CSV_HPP
14 
15 #include "load_csv.hpp"
16 
17 namespace mlpack{
18 namespace data{
19 
20 template<typename eT>
21 bool LoadCSV::ConvertToken(eT& val,
22  const std::string& token)
23 {
24  const size_t N = size_t(token.length());
25  // Fill empty data points with 0.
26  if (N == 0)
27  {
28  val = eT(0);
29  return true;
30  }
31 
32  const char* str = token.c_str();
33 
34  // Checks for +/-INF and NAN
35  // Converts them to their equivalent representation
36  // from numeric_limits.
37  if ((N == 3) || (N == 4))
38  {
39  const bool neg = (str[0] == '-');
40  const bool pos = (str[0] == '+');
41 
42  const size_t offset = ((neg || pos) && (N == 4)) ? 1 : 0;
43 
44  const char sigA = str[offset];
45  const char sigB = str[offset + 1];
46  const char sigC = str[offset + 2];
47 
48  if (((sigA == 'i') || (sigA == 'I')) &&
49  ((sigB == 'n') || (sigB == 'N')) &&
50  ((sigC == 'f') || (sigC == 'F')))
51  {
52  val = neg ? -(std::numeric_limits<eT>
53  ::infinity()) : std::numeric_limits<eT>::infinity();
54  return true;
55  }
56  else if (((sigA == 'n') || (sigA == 'N')) &&
57  ((sigB == 'a') || (sigB == 'A')) &&
58  ((sigC == 'n') || (sigC == 'N')))
59  {
60  val = std::numeric_limits<eT>::quiet_NaN();
61  return true;
62  }
63  }
64 
65  char* endptr = nullptr;
66 
67  // Convert the token into correct type.
68  // If we have a eT as unsigned int,
69  // it will convert all negative numbers to 0.
70  if (std::is_floating_point<eT>::value)
71  {
72  val = eT(std::strtod(str, &endptr));
73  }
74  else if (std::is_integral<eT>::value)
75  {
76  if (std::is_signed<eT>::value)
77  val = eT(std::strtoll(str, &endptr, 10));
78  else
79  {
80  if (str[0] == '-')
81  {
82  val = eT(0);
83  return true;
84  }
85  val = eT(std::strtoull(str, &endptr, 10));
86  }
87  }
88  // If none of the above conditions was executed,
89  // then the conversion will fail.
90  else
91  return false;
92 
93  // If any of strtod() or strtoll() fails, str will
94  // be set to nullptr and this condition will be
95  // executed.
96  if (str == endptr)
97  return false;
98 
99  return true;
100 }
101 
102 template<typename eT>
103 bool LoadCSV::LoadNumericCSV(arma::Mat<eT>& x, std::fstream& f)
104 {
105  bool loadOkay = f.good();
106  f.clear();
107  std::pair<size_t, size_t> mat_size = GetMatrixSize<true>(f);
108  x.zeros(mat_size.first, mat_size.second);
109  size_t row = 0;
110 
111  std::string lineString;
112  std::stringstream lineStream;
113  std::string token;
114 
115  while (f.good())
116  {
117  // Parse the file line by line.
118  std::getline(f, lineString);
119 
120  if (lineString.size() == 0)
121  break;
122 
123  lineStream.clear();
124  lineStream.str(lineString);
125 
126  size_t col = 0;
127 
128  while (lineStream.good())
129  {
130  // Parse each line.
131  std::getline(lineStream, token, ',');
132 
133  // This will handle loading of both dense and sparse.
134  // Initialize tmp_val of type eT with value 0.
135  eT tmpVal = eT(0);
136 
137  if (ConvertToken<eT>(tmpVal, token))
138  {
139  x.at(row, col) = tmpVal;
140  ++col;
141  }
142  else
143  {
144  // Printing failed token and it's location.
145  Log::Warn << "Failed to convert token " << token << ", at row " << row
146  << ", column " << col << " of matrix!";
147 
148  return false;
149  }
150  }
151  ++row;
152  }
153  return loadOkay;
154 }
155 
156 inline void LoadCSV::NumericMatSize(std::stringstream& lineStream,
157  size_t& col,
158  const char delim)
159 {
160  std::string token;
161  while (lineStream.good())
162  {
163  std::getline(lineStream, token, delim);
164  ++col;
165  }
166 }
167 
168 } // namespace data
169 } // namespace mlpack
170 
171 #endif
static util::PrefixedOutStream Warn
Definition: log.hpp:99
Linear algebra utility functions, generally performed on matrices or vectors.
bool ConvertToken(eT &val, const std::string &token)
Converts the given string token to assigned datatype and assigns this value to the given address...
bool LoadNumericCSV(arma::Mat< eT > &x, std::fstream &f)
Returns a bool value showing whether data was loaded successfully or not.
constexpr auto data(Container const &container) noexcept -> decltype(container.data())
Definition: iterator.hpp:79
void NumericMatSize(std::stringstream &lineStream, size_t &col, const char delim)
Calculate the number of columns in each row and assign the value to the col.