continuous_mountain_car.hpp
Go to the documentation of this file.
1 
17 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_CONTINUOUS_MOUNTAIN_CAR_HPP
18 #define MLPACK_METHODS_RL_ENVIRONMENT_CONTINUOUS_MOUNTAIN_CAR_HPP
19 
20 #include <mlpack/prereqs.hpp>
22 
23 namespace mlpack {
24 namespace rl {
25 
30 {
31  public:
36  class State
37  {
38  public:
42  State() : data(dimension, arma::fill::zeros)
43  { /* Nothing to do here. */ }
44 
50  State(const arma::colvec& data): data(data)
51  { /* Nothing to do here. */ }
52 
54  arma::colvec& Data() { return data; }
55 
57  double Velocity() const { return data[0]; }
59  double& Velocity() { return data[0]; }
60 
62  double Position() const { return data[1]; }
64  double& Position() { return data[1]; }
65 
67  const arma::colvec& Encode() const { return data; }
68 
70  static constexpr size_t dimension = 2;
71 
72  private:
74  arma::colvec data;
75  };
76 
85  struct Action
86  {
87  double action[1];
88  // Storing degree of freedom
89  const int size = 1;
90  };
91 
105  ContinuousMountainCar(const double positionMin = -1.2,
106  const double positionMax = 0.6,
107  const double positionGoal = 0.45,
108  const double velocityMin = -0.07,
109  const double velocityMax = 0.07,
110  const double duration = 0.0015,
111  const double doneReward = 100,
112  const size_t maxSteps = 0) :
113  positionMin(positionMin),
114  positionMax(positionMax),
115  positionGoal(positionGoal),
116  velocityMin(velocityMin),
117  velocityMax(velocityMax),
118  duration(duration),
119  doneReward(doneReward),
120  maxSteps(maxSteps),
121  stepsPerformed(0)
122  { /* Nothing to do here */ }
123 
132  double Sample(const State& state,
133  const Action& action,
134  State& nextState)
135  {
136  // Update the number of steps performed.
137  stepsPerformed++;
138 
139  // Calculate acceleration.
140  double force = math::ClampRange(action.action[0], -1.0, 1.0);
141 
142  // Update states.
143  nextState.Velocity() = state.Velocity() + force * duration - 0.0025 *
144  std::cos(3 * state.Position());
145  nextState.Velocity() = math::ClampRange(nextState.Velocity(),
146  velocityMin, velocityMax);
147  nextState.Position() = state.Position() + nextState.Velocity();
148  nextState.Position() = math::ClampRange(nextState.Position(),
149  positionMin, positionMax);
150  if (nextState.Position() == positionMin && nextState.Velocity() < 0)
151  nextState.Velocity() = 0.0;
152 
153  // Check if the episode has terminated.
154  bool done = IsTerminal(nextState);
155 
156  // Do not reward the agent if time ran out.
157  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
158  return 0;
159  else if (done)
160  return doneReward;
161 
162  return std::pow(action.action[0], 2) * 0.1;
163  }
164 
173  double Sample(const State& state, const Action& action)
174  {
175  State nextState;
176  return Sample(state, action, nextState);
177  }
178 
186  {
187  State state;
188  stepsPerformed = 0;
189  state.Velocity() = 0.0;
190  state.Position() = math::Random(-0.6, -0.4);
191  return state;
192  }
193 
200  bool IsTerminal(const State& state) const
201  {
202  if (maxSteps != 0 && stepsPerformed >= maxSteps)
203  {
204  Log::Info << "Episode terminated due to the maximum number of steps"
205  "being taken.";
206  return true;
207  }
208  else if (state.Position() >= positionGoal)
209  {
210  Log::Info << "Episode terminated due to agent succeeding.";
211  return true;
212  }
213  return false;
214  }
215 
217  size_t StepsPerformed() const { return stepsPerformed; }
218 
220  size_t MaxSteps() const { return maxSteps; }
222  size_t& MaxSteps() { return maxSteps; }
223 
224  private:
226  double positionMin;
227 
229  double positionMax;
230 
232  double positionGoal;
233 
235  double velocityMin;
236 
238  double velocityMax;
239 
241  double duration;
242 
244  double doneReward;
245 
247  size_t maxSteps;
248 
250  size_t stepsPerformed;
251 };
252 
253 } // namespace rl
254 } // namespace mlpack
255 
256 #endif
constexpr auto size(Container const &container) noexcept -> decltype(container.size())
Definition: iterator.hpp:29
Linear algebra utility functions, generally performed on matrices or vectors.
double Sample(const State &state, const Action &action)
Dynamics of Continuous Mountain Car.
The core includes that mlpack expects; standard C++ includes, Armadillo, cereal, and a few basic mlpa...
Implementation of state of Continuous Mountain Car.
const arma::colvec & Encode() const
Encode the state to a column vector.
static constexpr size_t dimension
Dimension of the encoded state.
Miscellaneous math clamping routines.
auto fill(Range &&rng, T const &value) -> enable_if_t< is_range< Range >::value >
Definition: algorithm.hpp:766
State(const arma::colvec &data)
Construct a state based on the given data.
arma::colvec & Data()
Modify the internal representation of the state.
Implementation of action of Continuous Mountain Car.
size_t & MaxSteps()
Set the maximum number of steps allowed.
Implementation of Continuous Mountain Car task.
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:96
size_t MaxSteps() const
Get the maximum number of steps allowed.
size_t StepsPerformed() const
Get the number of steps performed.
static util::PrefixedOutStream Info
Definition: log.hpp:93
State InitialSample()
Initial position is randomly generated within [-0.6, -0.4].
ContinuousMountainCar(const double positionMin=-1.2, const double positionMax=0.6, const double positionGoal=0.45, const double velocityMin=-0.07, const double velocityMax=0.07, const double duration=0.0015, const double doneReward=100, const size_t maxSteps=0)
Construct a Continuous Mountain Car instance using the given constant.
bool IsTerminal(const State &state) const
Whether given state is a terminal state.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Continuous Mountain Car.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53