continuous_mountain_car.hpp
Go to the documentation of this file.
1 
17 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_CONTINUOUS_MOUNTAIN_CAR_HPP
18 #define MLPACK_METHODS_RL_ENVIRONMENT_CONTINUOUS_MOUNTAIN_CAR_HPP
19 
20 #include <mlpack/prereqs.hpp>
21 
22 namespace mlpack {
23 namespace rl {
24 
29 {
30  public:
35  class State
36  {
37  public:
41  State() : data(dimension, arma::fill::zeros)
42  { /* Nothing to do here. */ }
43 
49  State(const arma::colvec& data): data(data)
50  { /* Nothing to do here. */ }
51 
53  arma::colvec& Data() { return data; }
54 
56  double Velocity() const { return data[0]; }
58  double& Velocity() { return data[0]; }
59 
61  double Position() const { return data[1]; }
63  double& Position() { return data[1]; }
64 
66  const arma::colvec& Encode() const { return data; }
67 
69  static constexpr size_t dimension = 2;
70 
71  private:
73  arma::colvec data;
74  };
75 
84  struct Action
85  {
86  double action[1];
87  // Storing degree of freedom
88  const int size = 1;
89  };
90 
104  ContinuousMountainCar(const double positionMin = -1.2,
105  const double positionMax = 0.6,
106  const double positionGoal = 0.45,
107  const double velocityMin = -0.07,
108  const double velocityMax = 0.07,
109  const double power = 0.0015,
110  const double doneReward = 100,
111  const size_t maxSteps = 0) :
112  positionMin(positionMin),
113  positionMax(positionMax),
114  positionGoal(positionGoal),
115  velocityMin(velocityMin),
116  velocityMax(velocityMax),
117  power(power),
118  doneReward(doneReward),
119  maxSteps(maxSteps),
120  stepsPerformed(0)
121  { /* Nothing to do here */ }
122 
131  double Sample(const State& state,
132  const Action& action,
133  State& nextState)
134  {
135  // Update the number of steps performed.
136  stepsPerformed++;
137 
138  // Calculate acceleration.
139  double force = std::min(std::max(action.action[0], -1.0), 1.0);
140 
141  // Update states.
142  nextState.Velocity() = state.Velocity() + force * power - 0.0025 *
143  std::cos(3 * state.Position());
144  nextState.Velocity() = std::min(
145  std::max(nextState.Velocity(), velocityMin), velocityMax);
146  nextState.Position() = state.Position() + nextState.Velocity();
147  nextState.Position() = std::min(
148  std::max(nextState.Position(), positionMin), positionMax);
149  if (nextState.Position() == positionMin && nextState.Velocity() < 0)
150  nextState.Velocity() = 0.0;
151 
152  // Check if the episode has terminated.
153  bool done = IsTerminal(nextState);
154 
155  // Do not reward the agent if time ran out.
156  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
157  return 0;
158  else if (done)
159  return doneReward;
160 
161  return std::pow(action.action[0], 2) * 0.1;
162  }
163 
172  double Sample(const State& state, const Action& action)
173  {
174  State nextState;
175  return Sample(state, action, nextState);
176  }
177 
185  {
186  State state;
187  stepsPerformed = 0;
188  state.Velocity() = 0.0;
189  state.Position() = math::Random(-0.6, -0.4);
190  return state;
191  }
192 
199  bool IsTerminal(const State& state) const
200  {
201  if (maxSteps != 0 && stepsPerformed >= maxSteps)
202  {
203  Log::Info << "Episode terminated due to the maximum number of steps"
204  "being taken.";
205  return true;
206  }
207  else if (state.Position() >= positionGoal)
208  {
209  Log::Info << "Episode terminated due to agent succeeding.";
210  return true;
211  }
212  return false;
213  }
214 
216  size_t StepsPerformed() const { return stepsPerformed; }
217 
219  size_t MaxSteps() const { return maxSteps; }
221  size_t& MaxSteps() { return maxSteps; }
222 
223  private:
225  double positionMin;
226 
228  double positionMax;
229 
231  double positionGoal;
232 
234  double velocityMin;
235 
237  double velocityMax;
238 
240  double power;
241 
243  double doneReward;
244 
246  size_t maxSteps;
247 
249  size_t stepsPerformed;
250 };
251 
252 } // namespace rl
253 } // namespace mlpack
254 
255 #endif
ContinuousMountainCar(const double positionMin=-1.2, const double positionMax=0.6, const double positionGoal=0.45, const double velocityMin=-0.07, const double velocityMax=0.07, const double power=0.0015, const double doneReward=100, const size_t maxSteps=0)
Construct a Continuous Mountain Car instance using the given constant.
.hpp
Definition: add_to_po.hpp:21
double Sample(const State &state, const Action &action)
Dynamics of Continuous Mountain Car.
The core includes that mlpack expects; standard C++ includes and Armadillo.
Implementation of state of Continuous Mountain Car.
const arma::colvec & Encode() const
Encode the state to a column vector.
static constexpr size_t dimension
Dimension of the encoded state.
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
State(const arma::colvec &data)
Construct a state based on the given data.
arma::colvec & Data()
Modify the internal representation of the state.
Implementation of action of Continuous Mountain Car.
size_t & MaxSteps()
Set the maximum number of steps allowed.
Implementation of Continuous Mountain Car task.
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:78
size_t MaxSteps() const
Get the maximum number of steps allowed.
size_t StepsPerformed() const
Get the number of steps performed.
State InitialSample()
Initial position is randomly generated within [-0.6, -0.4].
bool IsTerminal(const State &state) const
Whether given state is a terminal state.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Continuous Mountain Car.