mountain_car.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
18 
19 #include <mlpack/prereqs.hpp>
21 
22 namespace mlpack {
23 namespace rl {
24 
29 {
30  public:
35  class State
36  {
37  public:
41  State(): data(dimension, arma::fill::zeros)
42  { /* Nothing to do here. */ }
43 
49  State(const arma::colvec& data): data(data)
50  { /* Nothing to do here. */ }
51 
53  arma::colvec& Data() { return data; }
54 
56  double Velocity() const { return data[0]; }
58  double& Velocity() { return data[0]; }
59 
61  double Position() const { return data[1]; }
63  double& Position() { return data[1]; }
64 
66  const arma::colvec& Encode() const { return data; }
67 
69  static constexpr size_t dimension = 2;
70 
71  private:
73  arma::colvec data;
74  };
75 
79  class Action
80  {
81  public:
82  enum actions
83  {
87  };
88  // To store the action.
90 
91  // Track the size of the action space.
92  static const size_t size = 3;
93  };
94 
107  MountainCar(const size_t maxSteps = 200,
108  const double positionMin = -1.2,
109  const double positionMax = 0.6,
110  const double positionGoal = 0.5,
111  const double velocityMin = -0.07,
112  const double velocityMax = 0.07,
113  const double doneReward = 0) :
114  maxSteps(maxSteps),
115  positionMin(positionMin),
116  positionMax(positionMax),
117  positionGoal(positionGoal),
118  velocityMin(velocityMin),
119  velocityMax(velocityMax),
120  doneReward(doneReward),
121  stepsPerformed(0)
122  { /* Nothing to do here */ }
123 
133  double Sample(const State& state,
134  const Action& action,
135  State& nextState)
136  {
137  // Update the number of steps performed.
138  stepsPerformed++;
139 
140  // Calculate acceleration.
141  int direction = action.action - 1;
142  nextState.Velocity() = state.Velocity() + 0.001 * direction - 0.0025 *
143  std::cos(3 * state.Position());
144  nextState.Velocity() = math::ClampRange(nextState.Velocity(),
145  velocityMin, velocityMax);
146 
147  // Update states.
148  nextState.Position() = state.Position() + nextState.Velocity();
149  nextState.Position() = math::ClampRange(nextState.Position(),
150  positionMin, positionMax);
151 
152  if (nextState.Position() == positionMin && nextState.Velocity() < 0)
153  nextState.Velocity() = 0.0;
154 
155  // Check if the episode has terminated.
156  bool done = IsTerminal(nextState);
157 
158  // Do not reward the agent if time ran out.
159  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
160  return 0;
161  else if (done)
162  return doneReward;
163 
164  return -1;
165  }
166 
175  double Sample(const State& state, const Action& action)
176  {
177  State nextState;
178  return Sample(state, action, nextState);
179  }
180 
188  {
189  State state;
190  stepsPerformed = 0;
191  state.Velocity() = 0.0;
192  state.Position() = arma::as_scalar(arma::randu(1)) * 0.2 - 0.6;
193  return state;
194  }
195 
202  bool IsTerminal(const State& state) const
203  {
204  if (maxSteps != 0 && stepsPerformed >= maxSteps)
205  {
206  Log::Info << "Episode terminated due to the maximum number of steps"
207  "being taken.";
208  return true;
209  }
210  else if (state.Position() >= positionGoal)
211  {
212  Log::Info << "Episode terminated due to agent succeeding.";
213  return true;
214  }
215  return false;
216  }
217 
219  size_t StepsPerformed() const { return stepsPerformed; }
220 
222  size_t MaxSteps() const { return maxSteps; }
224  size_t& MaxSteps() { return maxSteps; }
225 
226  private:
228  size_t maxSteps;
229 
231  double positionMin;
232 
234  double positionMax;
235 
237  double positionGoal;
238 
240  double velocityMin;
241 
243  double velocityMax;
244 
246  double doneReward;
247 
249  size_t stepsPerformed;
250 };
251 
252 } // namespace rl
253 } // namespace mlpack
254 
255 #endif
Implementation of action of Mountain Car.
constexpr auto size(Container const &container) noexcept -> decltype(container.size())
Definition: iterator.hpp:29
bool IsTerminal(const State &state) const
This function checks if the car has reached the terminal state.
size_t MaxSteps() const
Get the maximum number of steps allowed.
Linear algebra utility functions, generally performed on matrices or vectors.
State InitialSample()
Initial position is randomly generated within [-0.6, -0.4].
constexpr T && forward(remove_reference_t< T > &t) noexcept
Definition: utility.hpp:27
The core includes that mlpack expects; standard C++ includes, Armadillo, cereal, and a few basic mlpa...
const arma::colvec & Encode() const
Encode the state to a column vector.
double & Position()
Modify the position.
double & Velocity()
Modify the velocity.
Miscellaneous math clamping routines.
auto fill(Range &&rng, T const &value) -> enable_if_t< is_range< Range >::value >
Definition: algorithm.hpp:766
double Sample(const State &state, const Action &action)
Dynamics of Mountain Car.
State(const arma::colvec &data)
Construct a state based on the given data.
arma::colvec & Data()
Modify the internal representation of the state.
MountainCar(const size_t maxSteps=200, const double positionMin=-1.2, const double positionMax=0.6, const double positionGoal=0.5, const double velocityMin=-0.07, const double velocityMax=0.07, const double doneReward=0)
Construct a Mountain Car instance using the given constant.
Implementation of state of Mountain Car.
State()
Construct a state instance.
Implementation of Mountain Car task.
size_t StepsPerformed() const
Get the number of steps performed.
static util::PrefixedOutStream Info
Definition: log.hpp:94
size_t & MaxSteps()
Set the maximum number of steps allowed.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Mountain Car.
double Position() const
Get the position.
static constexpr size_t dimension
Dimension of the encoded state.
double Velocity() const
Get the velocity.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53