acrobot.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
15 
16 #include <mlpack/core.hpp>
17 
18 namespace mlpack{
19 namespace rl{
20 
28 class Acrobot
29 {
30  public:
31  /*
32  * Implementation of Acrobot State. Each State is a tuple vector
33  * (theta1, thetha2, angular velocity 1, angular velocity 2).
34  */
35  class State
36  {
37  public:
41  State(): data(dimension) { /* nothing to do here */ }
42 
48  State(const arma::colvec& data) : data(data)
49  { /* nothing to do here */ }
50 
52  arma::colvec& Data() { return data; }
53 
55  double Theta1() const { return data[0]; }
57  double& Theta1() { return data[0]; }
58 
60  double Theta2() const { return data[1]; }
62  double& Theta2() { return data[1]; }
63 
65  double AngularVelocity1() const { return data[2]; }
67  double& AngularVelocity1() { return data[2]; }
68 
70  double AngularVelocity2() const { return data[3]; }
72  double& AngularVelocity2() { return data[3]; }
73 
75  const arma::colvec& Encode() const { return data; }
76 
78  static constexpr size_t dimension = 4;
79 
80  private:
82  arma::colvec data;
83  };
84 
85  /*
86  * Implementation of action for Acrobot
87  */
88  enum Action
89  {
93 
94  // Track the size of the action space.
96  };
97 
116  Acrobot(const double gravity = 9.81,
117  const double linkLength1 = 1.0,
118  const double linkLength2 = 1.0,
119  const double linkMass1 = 1.0,
120  const double linkMass2 = 1.0,
121  const double linkCom1 = 0.5,
122  const double linkCom2 = 0.5,
123  const double linkMoi = 1.0,
124  const double maxVel1 = 4 * M_PI,
125  const double maxVel2 = 9 * M_PI,
126  const double dt = 0.2,
127  const double doneReward = 0,
128  const size_t maxSteps = 0) :
129  gravity(gravity),
130  linkLength1(linkLength1),
131  linkLength2(linkLength2),
132  linkMass1(linkMass1),
133  linkMass2(linkMass2),
134  linkCom1(linkCom1),
135  linkCom2(linkCom2),
136  linkMoi(linkMoi),
137  maxVel1(maxVel1),
138  maxVel2(maxVel2),
139  dt(dt),
140  doneReward(doneReward),
141  maxSteps(maxSteps),
142  stepsPerformed(0)
143  { /* Nothing to do here */ }
144 
154  double Sample(const State& state,
155  const Action& action,
156  State& nextState)
157  {
158  // Update the number of steps performed.
159  stepsPerformed++;
160 
161  // Make a vector to estimate nextstate.
162  arma::colvec currentState = {state.Theta1(), state.Theta2(),
163  state.AngularVelocity1(), state.AngularVelocity2()};
164 
165  arma::colvec currentNextState = Rk4(currentState, Torque(action));
166 
167  nextState.Theta1() = Wrap(currentNextState[0], -M_PI, M_PI);
168 
169  nextState.Theta2() = Wrap(currentNextState[1], -M_PI, M_PI);
170 
172  nextState.AngularVelocity1() = std::min(
173  std::max(currentNextState[2], -maxVel1), maxVel1);
174  nextState.AngularVelocity2() = std::min(
175  std::max(currentNextState[3], -maxVel2), maxVel2);
176 
177  // Check if the episode has terminated.
178  bool done = IsTerminal(nextState);
179 
180  // Do not reward the agent if time ran out.
181  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
182  return 0;
183  else if (done)
184  return doneReward;
185 
186  return -1;
187  };
188 
198  double Sample(const State& state, const Action& action)
199  {
200  State nextState;
201  return Sample(state, action, nextState);
202  }
203 
208  {
209  stepsPerformed = 0;
210  return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
211  }
212 
219  bool IsTerminal(const State& state) const
220  {
221  if (maxSteps != 0 && stepsPerformed >= maxSteps)
222  {
223  Log::Info << "Episode terminated due to the maximum number of steps"
224  "being taken.";
225  return true;
226  }
227  else if (-std::cos(state.Theta1()) - std::cos(state.Theta1() +
228  state.Theta2()) > 1.0)
229  {
230  Log::Info << "Episode terminated due to agent succeeding.";
231  return true;
232  }
233  return false;
234  }
235 
243  arma::colvec Dsdt(arma::colvec state, const double torque) const
244  {
245  const double m1 = linkMass1;
246  const double m2 = linkMass2;
247  const double l1 = linkLength1;
248  const double lc1 = linkCom1;
249  const double lc2 = linkCom2;
250  const double I1 = linkMoi;
251  const double I2 = linkMoi;
252  const double g = gravity;
253  const double a = torque;
254  const double theta1 = state[0];
255  const double theta2 = state[1];
256 
257  arma::colvec values(4);
258  values[0] = state[2];
259  values[1] = state[3];
260 
261  const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
262  std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
263 
264  const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
265  I2;
266 
267  const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 - M_PI / 2.);
268 
269  const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
270  std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
271  std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
272  std::cos(theta1 - M_PI / 2) + phi2;
273 
274  values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
275  std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
276  std::pow(d2, 2) / d1);
277 
278  values[2] = -(d2 * values[3] + phi1) / d1;
279 
280  return values;
281  };
282 
292  double Wrap(double value,
293  const double minimum,
294  const double maximum) const
295  {
296  const double diff = maximum - minimum;
297 
298  if (value > maximum)
299  {
300  value = value - diff;
301  }
302  else if (value < minimum)
303  {
304  value = value + diff;
305  }
306 
307  return value;
308  };
309 
316  double Torque(const Action& action) const
317  {
318  // Add noise to the Torque Torque is action number - 1. {0,1,2} -> {-1,0,1}.
319  return double(action - 1) + mlpack::math::Random(-0.1, 0.1);
320  }
321 
329  arma::colvec Rk4(const arma::colvec state, const double torque) const
330  {
331  arma::colvec k1 = Dsdt(state, torque);
332  arma::colvec k2 = Dsdt(state + dt * k1 / 2, torque);
333  arma::colvec k3 = Dsdt(state + dt * k2 / 2, torque);
334  arma::colvec k4 = Dsdt(state + dt * k3, torque);
335  arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
336 
337  return nextState;
338  };
339 
341  size_t StepsPerformed() const { return stepsPerformed; }
342 
344  size_t MaxSteps() const { return maxSteps; }
346  size_t& MaxSteps() { return maxSteps; }
347 
348  private:
350  double gravity;
351 
353  double linkLength1;
354 
356  double linkLength2;
357 
359  double linkMass1;
360 
362  double linkMass2;
363 
365  double linkCom1;
366 
368  double linkCom2;
369 
371  double linkMoi;
372 
374  double maxVel1;
375 
377  double maxVel2;
378 
380  double dt;
381 
383  double doneReward;
384 
386  size_t maxSteps;
387 
389  size_t stepsPerformed;
390 };
391 
392 } // namespace rl
393 } // namespace mlpack
394 
395 #endif
arma::colvec & Data()
Modify the state representation.
Definition: acrobot.hpp:52
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:154
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
Definition: acrobot.hpp:219
.hpp
Definition: add_to_po.hpp:21
double & AngularVelocity1()
Modify the angular velocity (one).
Definition: acrobot.hpp:67
double AngularVelocity1() const
Get value of Angular velocity (one).
Definition: acrobot.hpp:65
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: acrobot.hpp:344
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: acrobot.hpp:75
#define M_PI
Definition: prereqs.hpp:39
size_t StepsPerformed() const
Get the number of steps performed.
Definition: acrobot.hpp:341
double & Theta1()
Modify value of theta (one).
Definition: acrobot.hpp:57
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
double Torque(const Action &action) const
This function calculates the torque for a particular action.
Definition: acrobot.hpp:316
double & Theta2()
Modify value of theta (two).
Definition: acrobot.hpp:62
double & AngularVelocity2()
Modify the angular velocity (two).
Definition: acrobot.hpp:72
static constexpr size_t dimension
Dimension of the encoded state.
Definition: acrobot.hpp:78
Implementation of Acrobot game.
Definition: acrobot.hpp:28
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: acrobot.hpp:346
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
Definition: acrobot.hpp:243
double AngularVelocity2() const
Get value of Angular velocity (two).
Definition: acrobot.hpp:70
double Sample(const State &state, const Action &action)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:198
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double Theta2() const
Get value of theta (two).
Definition: acrobot.hpp:60
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:78
double Theta1() const
Get value of theta (one).
Definition: acrobot.hpp:55
State()
Construct a state instance.
Definition: acrobot.hpp:41
State(const arma::colvec &data)
Construct a state instance from given data.
Definition: acrobot.hpp:48
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
Definition: acrobot.hpp:292
State InitialSample()
This function does random initialization of state space.
Definition: acrobot.hpp:207
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
Definition: acrobot.hpp:329
Acrobot(const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0, const size_t maxSteps=0)
Construct a Acrobot instance using the given constants.
Definition: acrobot.hpp:116