pendulum.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
18 
19 #include <mlpack/prereqs.hpp>
21 
22 namespace mlpack {
23 namespace rl {
24 
31 class Pendulum
32 {
33  public:
38  class State
39  {
40  public:
44  State() : theta(0), data(dimension, arma::fill::zeros)
45  { /* Nothing to do here. */ }
46 
53  State(const arma::colvec& data): theta(0), data(data)
54  { /* Nothing to do here. */ }
55 
57  arma::colvec& Data() { return data; }
58 
60  double Theta() const { return theta; }
62  double& Theta() { return theta; }
63 
65  double AngularVelocity() const { return data[2]; }
67  double& AngularVelocity() { return data[2]; }
68 
70  const arma::colvec& Encode() { return data; }
71 
73  void SetState()
74  {
75  data[0] = std::sin(theta);
76  data[1] = std::cos(theta);
77  }
78 
80  static constexpr size_t dimension = 3;
81 
82  private:
84  double theta;
85 
87  arma::colvec data;
88  };
89 
95  class Action
96  {
97  public:
101  Action() : action(1)
102  { /* Nothing to do here */ }
103  std::vector<double> action;
104  // Storing degree of freedom.
105  static const size_t size = 1;
106  };
107 
118  Pendulum(const size_t maxSteps = 200,
119  const double maxAngularVelocity = 8,
120  const double maxTorque = 2.0,
121  const double dt = 0.05,
122  const double doneReward = 0.0) :
123  maxSteps(maxSteps),
124  maxAngularVelocity(maxAngularVelocity),
125  maxTorque(maxTorque),
126  dt(dt),
127  doneReward(doneReward),
128  stepsPerformed(0)
129  { /* Nothing to do here */ }
130 
140  double Sample(const State& state,
141  const Action& action,
142  State& nextState)
143  {
144  // Update the number of steps performed.
145  stepsPerformed++;
146 
147  // Get current state.
148  double theta = state.Theta();
149  double angularVelocity = state.AngularVelocity();
150 
151  // Define constants which specify our pendulum.
152  const double gravity = 10.0;
153  const double mass = 1.0;
154  const double length = 1.0;
155 
156  // Get action and clip the values between max and min limits.
157  double torque = math::ClampRange(action.action[0], -maxTorque, maxTorque);
158 
159  // Calculate costs of taking this action in the current state.
160  double costs = std::pow(AngleNormalize(theta), 2) + 0.1 *
161  std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);
162 
163  // Calculate new state values and assign to the next state.
164  double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
165  length) * std::sin(theta + M_PI) + 3.0 / (mass * std::pow(length, 2)) *
166  torque) * dt;
167  nextState.Theta() = theta + newAngularVelocity * dt;
168  nextState.AngularVelocity() = math::ClampRange(newAngularVelocity,
169  -maxAngularVelocity, maxAngularVelocity);
170 
171  nextState.SetState();
172 
173  // Return the reward of taking the action in current state.
174  // The reward is simply the negative of cost incurred for the action.
175  return -costs;
176  }
177 
185  double Sample(const State& state, const Action& action)
186  {
187  State nextState;
188  return Sample(state, action, nextState);
189  }
190 
198  {
199  State state;
200  state.Theta() = math::Random(-M_PI, M_PI);
201  state.AngularVelocity() = math::Random(-1.0, 1.0);
202  stepsPerformed = 0;
203  state.SetState();
204  return state;
205  }
206 
212  double AngleNormalize(double theta) const
213  {
214  // Scale angle within [-pi, pi).
215  double x = fmod(theta + M_PI, 2 * M_PI);
216  if (x < 0)
217  x += 2 * M_PI;
218  return x - M_PI;
219  }
220 
227  bool IsTerminal(const State& /* state */) const
228  {
229  if (maxSteps != 0 && stepsPerformed >= maxSteps)
230  {
231  Log::Info << "Episode terminated due to the maximum number of steps"
232  "being taken.";
233  return true;
234  }
235  return false;
236  }
237 
239  size_t StepsPerformed() const { return stepsPerformed; }
240 
242  size_t MaxSteps() const { return maxSteps; }
244  size_t& MaxSteps() { return maxSteps; }
245 
246  private:
248  size_t maxSteps;
249 
251  double maxAngularVelocity;
252 
254  double maxTorque;
255 
257  double dt;
258 
260  double doneReward;
261 
263  size_t stepsPerformed;
264 };
265 
266 } // namespace rl
267 } // namespace mlpack
268 
269 #endif
bool IsTerminal(const State &) const
This function checks if the pendulum has reaches a terminal state.
Definition: pendulum.hpp:227
double Theta() const
Get the theta.
Definition: pendulum.hpp:60
double & Theta()
Modify the value of theta.
Definition: pendulum.hpp:62
Implementation of Pendulum task.
Definition: pendulum.hpp:31
constexpr auto size(Container const &container) noexcept -> decltype(container.size())
Definition: iterator.hpp:29
Linear algebra utility functions, generally performed on matrices or vectors.
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: pendulum.hpp:242
const arma::colvec & Encode()
Encode the state to a column vector.
Definition: pendulum.hpp:70
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Pendulum.
Definition: pendulum.hpp:140
The core includes that mlpack expects; standard C++ includes and Armadillo.
arma::colvec & Data()
Modify the internal representation of the state.
Definition: pendulum.hpp:57
State(const arma::colvec &data)
Construct a state based on the given data.
Definition: pendulum.hpp:53
double AngleNormalize(double theta) const
This function calculates the normalized angle for a particular theta.
Definition: pendulum.hpp:212
#define M_PI
Definition: prereqs.hpp:39
double AngularVelocity() const
Get the angular velocity.
Definition: pendulum.hpp:65
Miscellaneous math clamping routines.
auto fill(Range &&rng, T const &value) -> enable_if_t< is_range< Range >::value >
Definition: algorithm.hpp:766
Implementation of action of Pendulum.
Definition: pendulum.hpp:95
State()
Construct a state instance.
Definition: pendulum.hpp:44
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
size_t StepsPerformed() const
Get the number of steps performed.
Definition: pendulum.hpp:239
void SetState()
Updates the theta transformations in data.
Definition: pendulum.hpp:73
double & AngularVelocity()
Modify the value of angular velocity.
Definition: pendulum.hpp:67
static constexpr size_t dimension
Dimension of the encoded state.
Definition: pendulum.hpp:80
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: pendulum.hpp:244
Implementation of state of Pendulum.
Definition: pendulum.hpp:38
double Sample(const State &state, const Action &action)
Dynamics of Pendulum.
Definition: pendulum.hpp:185
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
State InitialSample()
Initial theta is randomly generated within [-pi, pi].
Definition: pendulum.hpp:197
Action()
Construct an action instance.
Definition: pendulum.hpp:101
Pendulum(const size_t maxSteps=200, const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05, const double doneReward=0.0)
Construct a Pendulum instance using the given values.
Definition: pendulum.hpp:118
std::vector< double > action
Definition: pendulum.hpp:103
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53