mountain_car.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
18 
19 #include <mlpack/prereqs.hpp>
20 
21 namespace mlpack {
22 namespace rl {
23 
28 {
29  public:
34  class State
35  {
36  public:
40  State(): data(dimension, arma::fill::zeros)
41  { /* Nothing to do here. */ }
42 
48  State(const arma::colvec& data): data(data)
49  { /* Nothing to do here. */ }
50 
52  arma::colvec& Data() { return data; }
53 
55  double Velocity() const { return data[0]; }
57  double& Velocity() { return data[0]; }
58 
60  double Position() const { return data[1]; }
62  double& Position() { return data[1]; }
63 
65  const arma::colvec& Encode() const { return data; }
66 
68  static constexpr size_t dimension = 2;
69 
70  private:
72  arma::colvec data;
73  };
74 
78  enum Action
79  {
83 
86  };
87 
100  MountainCar(const double positionMin = -1.2,
101  const double positionMax = 0.6,
102  const double positionGoal = 0.5,
103  const double velocityMin = -0.07,
104  const double velocityMax = 0.07,
105  const double doneReward = 0,
106  const size_t maxSteps = 0) :
107  positionMin(positionMin),
108  positionMax(positionMax),
109  positionGoal(positionGoal),
110  velocityMin(velocityMin),
111  velocityMax(velocityMax),
112  doneReward(doneReward),
113  maxSteps(maxSteps),
114  stepsPerformed(0)
115  { /* Nothing to do here */ }
116 
126  double Sample(const State& state,
127  const Action& action,
128  State& nextState)
129  {
130  // Update the number of steps performed.
131  stepsPerformed++;
132 
133  // Calculate acceleration.
134  int direction = action - 1;
135  nextState.Velocity() = state.Velocity() + 0.001 * direction - 0.0025 *
136  std::cos(3 * state.Position());
137  nextState.Velocity() = std::min(
138  std::max(nextState.Velocity(), velocityMin), velocityMax);
139 
140  // Update states.
141  nextState.Position() = state.Position() + nextState.Velocity();
142  nextState.Position() = std::min(
143  std::max(nextState.Position(), positionMin), positionMax);
144 
145  if (nextState.Position() == positionMin && nextState.Velocity() < 0)
146  nextState.Velocity() = 0.0;
147 
148  // Check if the episode has terminated.
149  bool done = IsTerminal(nextState);
150 
151  // Do not reward the agent if time ran out.
152  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
153  return 0;
154  else if (done)
155  return doneReward;
156 
157  return -1;
158  }
159 
168  double Sample(const State& state, const Action& action)
169  {
170  State nextState;
171  return Sample(state, action, nextState);
172  }
173 
181  {
182  State state;
183  stepsPerformed = 0;
184  state.Velocity() = 0.0;
185  state.Position() = arma::as_scalar(arma::randu(1)) * 0.2 - 0.6;
186  return state;
187  }
188 
195  bool IsTerminal(const State& state) const
196  {
197  if (maxSteps != 0 && stepsPerformed >= maxSteps)
198  {
199  Log::Info << "Episode terminated due to the maximum number of steps"
200  "being taken.";
201  return true;
202  }
203  else if (state.Position() >= positionGoal)
204  {
205  Log::Info << "Episode terminated due to agent succeeding.";
206  return true;
207  }
208  return false;
209  }
210 
212  size_t StepsPerformed() const { return stepsPerformed; }
213 
215  size_t MaxSteps() const { return maxSteps; }
217  size_t& MaxSteps() { return maxSteps; }
218 
219  private:
221  double positionMin;
222 
224  double positionMax;
225 
227  double positionGoal;
228 
230  double velocityMin;
231 
233  double velocityMax;
234 
236  double doneReward;
237 
239  size_t maxSteps;
240 
242  size_t stepsPerformed;
243 };
244 
245 } // namespace rl
246 } // namespace mlpack
247 
248 #endif
bool IsTerminal(const State &state) const
This function checks if the car has reached the terminal state.
size_t MaxSteps() const
Get the maximum number of steps allowed.
.hpp
Definition: add_to_po.hpp:21
State InitialSample()
Initial position is randomly generated within [-0.6, -0.4].
The core includes that mlpack expects; standard C++ includes and Armadillo.
const arma::colvec & Encode() const
Encode the state to a column vector.
double & Position()
Modify the position.
double & Velocity()
Modify the velocity.
MountainCar(const double positionMin=-1.2, const double positionMax=0.6, const double positionGoal=0.5, const double velocityMin=-0.07, const double velocityMax=0.07, const double doneReward=0, const size_t maxSteps=0)
Construct a Mountain Car instance using the given constant.
double Sample(const State &state, const Action &action)
Dynamics of Mountain Car.
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
State(const arma::colvec &data)
Construct a state based on the given data.
Track the size of the action space.
Action
Implementation of action of Mountain Car.
arma::colvec & Data()
Modify the internal representation of the state.
Implementation of state of Mountain Car.
State()
Construct a state instance.
Implementation of Mountain Car task.
size_t StepsPerformed() const
Get the number of steps performed.
size_t & MaxSteps()
Set the maximum number of steps allowed.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Mountain Car.
double Position() const
Get the position.
static constexpr size_t dimension
Dimension of the encoded state.
double Velocity() const
Get the velocity.