Source code for arch_gym.envs.RandomParameterEnv

import gym
import random
from gym import spaces
import numpy as np

import math


print("Import Successful!")

[docs] class RandomParameterEnv(gym.Env): def __init__(self, natural=False): """ Initialize environment """ # Parameters to control self.action_space = spaces.Box(-5.0, 0.0, shape=(2,)) # parameter 1, parameter2 # observation features. Two randomly generated parameters self.observation_space = spaces.Box(-1e5, 1e5, shape=(1,2)) # validation accuracy self.state = np.random.random_sample(size=self.observation_space.shape) self.stepN = 0 self.episode = 0 self.cum_reward = 0 self.maxStepN = 1000 self.steps = 0 # Start the first game self.reset()
[docs] def step(self, action): """ Step function for the environment """ self.stepN = self.stepN + 1 self.steps = self.steps + 1 p1, p2 = action state = self.random_walk(p1,p2) reward = self.compute_reward() self.cum_reward = self.cum_reward+reward if (state[0][0]<=500 and state[0][1] <= 400): done = True else: done = False if(self.steps == self.maxStepN): self.steps = 0 state = self.reset() self.episode = self.episode + 1 info = {"step":self.stepN, "reward":reward, "episode":self.episode} return state, reward, done, info
[docs] def reset(self): self.state = np.zeros(shape=self.state.shape) return self.state
[docs] def compute_reward(self): return np.random.random()
[docs] def random_walk(self,p1, p2): new_state = np.array([np.random.random()*p1*np.random.random()*p2, np.random.random()*p1 + np.random.random()*p2]) return np.reshape(new_state,(1,2))
[docs] def render(self): print("Step:\t"+ str(self.stepN)+" \t Reward:\t"+str(self.cum_reward//self.stepN))