Source code for arch_gym.envs.TimeloopEnv

#!/usr/bin/env python3
import math
import time
import os
import gym
import multiprocessing
import numpy as np

settings_file_path = os.path.realpath(__file__)
settings_dir_path = os.path.dirname(settings_file_path)
os.sys.path.insert(0, settings_dir_path)
os.sys.path.insert(0, settings_dir_path + '/../../configs')
os.sys.path.insert(0, settings_dir_path + '/../../sims/Timeloop')

from   audioop       import mul
from   sims.Timeloop import simulate_timeloop, process_params
from   configs.sims  import Timeloop_config
from   envHelpers    import helpers

MAX_EPISODE_LENGTH = 10
MAX_STEPS = 100

[docs] class TimeloopEnv(gym.Env): def __init__(self, script_dir=None, output_dir=None, arch_dir=None, mapper_dir=None, workload_dir=None, target_val=None, num_cores=None, reward_formulation=None): param_obj = process_params.TimeloopConfigParams(Timeloop_config.timeloop_parameters) param_sizes = param_obj.get_param_size() # print(param_sizes) self.action_space = gym.spaces.MultiDiscrete(param_sizes) self.observation_space = gym.spaces.Box( low=-1, high=1e10, shape=(3,)) self.steps = 0 self.episode = 0 self.max_steps = MAX_STEPS self.max_episode_len = MAX_EPISODE_LENGTH self.timeloop_script = script_dir self.timeloop_output = output_dir self.timeloop_arch = arch_dir self.timeloop_mapper = mapper_dir self.timeloop_workload = workload_dir self.target_val = target_val self.cores = num_cores self.reward_formulation = reward_formulation print("Reward formulation: ", self.reward_formulation) if script_dir is None: self.timeloop_script = Timeloop_config.timeloop_scriptdir if output_dir is None: self.timeloop_output = Timeloop_config.timeloop_outputdir if arch_dir is None: self.timeloop_arch = Timeloop_config.timeloop_archdir if mapper_dir is None: self.timeloop_mapper = Timeloop_config.timeloop_mapperdir if workload_dir is None: self.timeloop_workload = Timeloop_config.timeloop_workloaddir if target_val is None: self.target_val = np.array([Timeloop_config.target_energy, Timeloop_config.target_area, Timeloop_config.target_cycles]) if num_cores is None: self.cores = int(Timeloop_config.timeloop_numcores) self.cores = self.cores//8 # 8 threads per timeloop run self.cumulative_reward = 0 self.helpers = helpers() # Batch mode directories self.timeloop_script_batch = [] self.timeloop_output_batch = [] self.timeloop_arch_batch = []
[docs] def step(self, action_params): '''Take an action in a timestep''' # Assumes that the action here is the modified architecture parameters for now self.steps += 1 obs = self.run_timeloop(action_params) done = True reward = self.calculate_reward(obs) self.cumulative_reward += reward return obs, reward, done, {}
[docs] def step_multiagent(self, action_params): '''Take one action for multiple agents in each timestep''' self.steps += 1 # create copies of all the directories for agent_ids in range(len(action_params)): s, o, a = self.helpers.create_timeloop_dirs( agent_ids, self.timeloop_script, self.timeloop_output, self.timeloop_arch) self.timeloop_script_batch.append(s) self.timeloop_output_batch.append(o) self.timeloop_arch_batch.append(a) obs_batch = self.run_timeloop_batch(action_params) # Calculate the reward for each agent rewards = [] for obs in obs_batch: rewards.append(self.calculate_reward(obs)) print(obs) print(rewards) done = True return obs_batch, rewards, done, {}
[docs] def reset(self): '''Reset the environment and associated variables''' print("Resetting Environment") # All unused? self.steps = 0 self.cumulative_reward = 0 obs = self.observation_space.sample() return obs
[docs] def reset_multiagent(self): '''Resets the multi-agent environment and associated variables''' self.helpers.remove_dirs(self.timeloop_script_batch) self.helpers.remove_dirs(self.timeloop_output_batch) self.helpers.remove_dirs(self.timeloop_arch_batch) self.timeloop_script_batch = [] self.timeloop_output_batch = [] self.timeloop_arch_batch = [] # All unused? self.steps = 0 self.cumulative_reward = 0 obs = self.observation_space.sample() return obs
[docs] def run_timeloop(self, arch_params): '''Invokes the timeloop scripts''' energy, area, cycles = simulate_timeloop.simulate_timeloop(self.timeloop_script, self.timeloop_output, self.timeloop_arch, self.timeloop_mapper, self.timeloop_workload, arch_params) obs = np.array([energy, area, cycles]) return obs
[docs] def run_timeloop_batch(self, multi_arch_params): '''Invokes the timeloop scripts in batch mode for all agents''' obs = [] pool_params = [] pool = multiprocessing.Pool(self.cores) for agent in range(len(multi_arch_params)): params = (self.timeloop_script_batch[agent], self.timeloop_output_batch[agent], self.timeloop_arch_batch[agent], self.timeloop_mapper, self.timeloop_workload, multi_arch_params[agent]) pool_params.append(params) energy, area, cycles = zip(*pool.starmap(simulate_timeloop.simulate_timeloop, pool_params)) for e, a, c in zip(energy, area, cycles): o = np.array([e, a, c]) obs.append(o) return obs
[docs] def calculate_reward(self, obs): ''' Calculates the reward based on the current observation ''' if obs[0] == -1.0 or obs[1] == -1.0 or obs[2] == -1.0: # return a very high number since timeloop failed # assumes you can achieve at max 99% improvement on each metric return 500*(len(self.target_val)) reward = 1e-3 if (self.reward_formulation == 'energy'): reward = max(((obs[0] - self.target_val[0])/self.target_val[0]), 0) elif (self.reward_formulation == 'area'): reward = max(((obs[1] - self.target_val[1])/self.target_val[1]), 0) elif (self.reward_formulation == 'latency'): reward = max(((obs[2] - self.target_val[2])/self.target_val[2]), 0) elif (self.reward_formulation == 'joint'): reward += max(((obs[0] - self.target_val[0])/self.target_val[0]), 0) reward += max(((obs[1] - self.target_val[1])/self.target_val[1]), 0) reward += max(((obs[2] - self.target_val[2])/self.target_val[2]), 0) # some algo (ACO) will throw error if reward is 0. So set it to a very small number if (reward == 0): reward = 1e-5 return reward