pendulum.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
18 
19 #include <mlpack/prereqs.hpp>
20 
21 namespace mlpack {
22 namespace rl {
23 
30 class Pendulum
31 {
32  public:
37  class State
38  {
39  public:
43  State() : data(dimension, arma::fill::zeros)
44  { /* Nothing to do here. */ }
45 
51  State(const arma::colvec& data): data(data)
52  { /* Nothing to do here. */ }
53 
55  arma::colvec& Data() { return data; }
56 
58  double Theta() const { return data[0]; }
60  double& Theta() { return data[0]; }
61 
63  double AngularVelocity() const { return data[1]; }
65  double& AngularVelocity() { return data[1]; }
66 
68  const arma::colvec& Encode() const { return data; }
69 
71  static constexpr size_t dimension = 2;
72 
73  private:
75  arma::colvec data;
76  };
77 
83  struct Action
84  {
85  double action[1];
86  // Storing degree of freedom
87  const int size = 1;
88  };
89 
102  Pendulum(const double maxAngularVelocity = 8,
103  const double maxTorque = 2.0,
104  const double dt = 0.05,
105  const double angleThreshold = M_PI / 12,
106  const double doneReward = 0.0,
107  const size_t maxSteps = 0) :
108  maxAngularVelocity(maxAngularVelocity),
109  maxTorque(maxTorque),
110  dt(dt),
111  angleThreshold(angleThreshold),
112  doneReward(doneReward),
113  maxSteps(maxSteps),
114  stepsPerformed(0)
115  { /* Nothing to do here */ }
116 
126  double Sample(const State& state,
127  const Action& action,
128  State& nextState)
129  {
130  // Update the number of steps performed.
131  stepsPerformed++;
132 
133  // Get current state.
134  double theta = state.Theta();
135  double angularVelocity = state.AngularVelocity();
136 
137  // Define constants which specify our pendulum.
138  const double gravity = 10.0;
139  const double mass = 1.0;
140  const double length = 1.0;
141 
142  // Get action and clip the values between max and min limits.
143  double torque = std::min(
144  std::max(action.action[0], -maxTorque), maxTorque);
145 
146  // Calculate costs of taking this action in the current state.
147  double costs = std::pow(AngleNormalize(theta), 2) + 0.1 *
148  std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);
149 
150  // Calculate new state values and assign to the next state.
151  double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
152  length) * std::sin(theta + M_PI) + 3.0 / std::pow(mass * length, 2) *
153  torque) * dt;
154  nextState.AngularVelocity() = std::min(std::max(newAngularVelocity,
155  -maxAngularVelocity), maxAngularVelocity);
156  nextState.Theta() = theta + newAngularVelocity * dt;
157 
158  // Check if the episode has terminated
159  bool done = IsTerminal(nextState);
160 
161  // Do not reward the agent if time ran out.
162  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
163  return 0;
164  else if (done)
165  return doneReward;
166 
167  // Return the reward of taking the action in current state.
168  // The reward is simply the negative of cost incurred for the action.
169  return -costs;
170  }
171 
179  double Sample(const State& state, const Action& action)
180  {
181  State nextState;
182  return Sample(state, action, nextState);
183  }
184 
192  {
193  State state;
194  state.Theta() = math::Random(-M_PI + angleThreshold, M_PI - angleThreshold);
195  state.AngularVelocity() = math::Random(-1.0, 1.0);
196  stepsPerformed = 0;
197  return state;
198  }
199 
205  double AngleNormalize(double theta) const
206  {
207  // Scale angle within [-pi, pi).
208  return double(fmod(theta + M_PI, 2 * M_PI) - M_PI);
209  }
210 
217  bool IsTerminal(const State& state) const
218  {
219  if (maxSteps != 0 && stepsPerformed >= maxSteps)
220  {
221  Log::Info << "Episode terminated due to the maximum number of steps"
222  "being taken.";
223  return true;
224  }
225  else if (state.Theta() > M_PI - angleThreshold ||
226  state.Theta() < -M_PI + angleThreshold)
227  {
228  Log::Info << "Episode terminated due to agent succeeding.";
229  return true;
230  }
231  return false;
232  }
233 
235  size_t StepsPerformed() const { return stepsPerformed; }
236 
238  size_t MaxSteps() const { return maxSteps; }
240  size_t& MaxSteps() { return maxSteps; }
241 
242  private:
244  double maxAngularVelocity;
245 
247  double maxTorque;
248 
250  double dt;
251 
253  double angleThreshold;
254 
256  double doneReward;
257 
259  size_t maxSteps;
260 
262  size_t stepsPerformed;
263 };
264 
265 } // namespace rl
266 } // namespace mlpack
267 
268 #endif
double Theta() const
Get the theta.
Definition: pendulum.hpp:58
double & Theta()
Modify the value of theta.
Definition: pendulum.hpp:60
Implementation of Pendulum task.
Definition: pendulum.hpp:30
.hpp
Definition: add_to_po.hpp:21
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: pendulum.hpp:238
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Pendulum.
Definition: pendulum.hpp:126
The core includes that mlpack expects; standard C++ includes and Armadillo.
Implementation of action of Pendulum.
Definition: pendulum.hpp:83
arma::colvec & Data()
Modify the internal representation of the state.
Definition: pendulum.hpp:55
State(const arma::colvec &data)
Construct a state based on the given data.
Definition: pendulum.hpp:51
double AngleNormalize(double theta) const
This function calculates the normalized angle for a particular theta.
Definition: pendulum.hpp:205
#define M_PI
Definition: prereqs.hpp:39
Pendulum(const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05, const double angleThreshold=M_PI/12, const double doneReward=0.0, const size_t maxSteps=0)
Construct a Pendulum instance using the given values.
Definition: pendulum.hpp:102
bool IsTerminal(const State &state) const
This function checks if the pendulum has reaches a terminal state.
Definition: pendulum.hpp:217
double AngularVelocity() const
Get the angular velocity.
Definition: pendulum.hpp:63
State()
Construct a state instance.
Definition: pendulum.hpp:43
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
size_t StepsPerformed() const
Get the number of steps performed.
Definition: pendulum.hpp:235
double & AngularVelocity()
Modify the value of angular velocity.
Definition: pendulum.hpp:65
static constexpr size_t dimension
Dimension of the encoded state.
Definition: pendulum.hpp:71
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: pendulum.hpp:240
Implementation of state of Pendulum.
Definition: pendulum.hpp:37
double Sample(const State &state, const Action &action)
Dynamics of Pendulum.
Definition: pendulum.hpp:179
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:78
State InitialSample()
Initial theta is randomly generated within [-pi, pi].
Definition: pendulum.hpp:191
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: pendulum.hpp:68