Source code for arch_gym.envs.simpleEnv

import gym
from gym.utils import seeding
import numpy as np


[docs] class simpleEnv(gym.Env): LF_MIN = 1 RT_MAX = 10 move_left = 0 move_right = 1 max_steps = 10 reward_away = -2 reward_step = -1 reward_goal = 10 metadata = { "render.modes": ["human"] } def __init__(self): self.action_space = gym.spaces.Discrete(2) self.observation_space = gym.spaces.Discrete(self.RT_MAX + 1) self.goal = int((self.LF_MIN + self.RT_MAX)/2) self.init_positions = list(range(self.LF_MIN, self.RT_MAX)) self.init_positions.remove(self.goal) self.seed() self.reset()
[docs] def reset(self): self.position = np.random.choice(self.init_positions) self.count = 0 self.state = self.position self.reward = 0 self.done = False self.info = {} return self.state
[docs] def step(self, action): """ Step function for the environment """ if self.done: print("Episode Done!") elif (self.count == self.max_steps): self.done = True else: self.count += 1 if(action == self.move_left): if(self.position == self.LF_MIN): self.reward = self.reward_away else: self.position -= 1 if(self.position == self.goal): self.reward = self.reward_goal self.done = True elif(self.position < self.goal): self.reward = self.reward_away else: self.reward = self.reward_step if(action == self.move_right): if(self.position == self.RT_MAX): self.reward = self.reward_away else: self.position += 1 if(self.position == self.goal): self.reward = self.reward_goal self.done = True elif(self.position > self.goal): self.reward = self.reward_away else: self.reward = self.reward_step self.state = self.position self.info["dist"] = self.goal - self.position try: assert self.observation_space.contains(self.state) except AssertionError: print("INVALID STATE",self.state) return [self.state, self.reward, self.done, self.info] def render(self): s = "position: {:2d} reward: {:2d} info: {}" print(s.format(self.position, self.reward, self.info)) def seed(self): self.np_random, seed = seeding.np_random(seed) return[seed] def close(self): pass