#!/usr/bin/env python3
import math
import time
import os
import gym
import multiprocessing
import numpy as np
settings_file_path = os.path.realpath(__file__)
settings_dir_path = os.path.dirname(settings_file_path)
os.sys.path.insert(0, settings_dir_path)
os.sys.path.insert(0, settings_dir_path + '/../../configs')
os.sys.path.insert(0, settings_dir_path + '/../../sims/Timeloop')
from audioop import mul
from sims.Timeloop import simulate_timeloop, process_params
from configs.sims import Timeloop_config
from envHelpers import helpers
MAX_EPISODE_LENGTH = 10
MAX_STEPS = 100
[docs]
class TimeloopEnv(gym.Env):
def __init__(self, script_dir=None, output_dir=None, arch_dir=None,
mapper_dir=None, workload_dir=None, target_val=None,
num_cores=None, reward_formulation=None):
param_obj = process_params.TimeloopConfigParams(Timeloop_config.timeloop_parameters)
param_sizes = param_obj.get_param_size()
# print(param_sizes)
self.action_space = gym.spaces.MultiDiscrete(param_sizes)
self.observation_space = gym.spaces.Box(
low=-1, high=1e10, shape=(3,))
self.steps = 0
self.episode = 0
self.max_steps = MAX_STEPS
self.max_episode_len = MAX_EPISODE_LENGTH
self.timeloop_script = script_dir
self.timeloop_output = output_dir
self.timeloop_arch = arch_dir
self.timeloop_mapper = mapper_dir
self.timeloop_workload = workload_dir
self.target_val = target_val
self.cores = num_cores
self.reward_formulation = reward_formulation
print("Reward formulation: ", self.reward_formulation)
if script_dir is None:
self.timeloop_script = Timeloop_config.timeloop_scriptdir
if output_dir is None:
self.timeloop_output = Timeloop_config.timeloop_outputdir
if arch_dir is None:
self.timeloop_arch = Timeloop_config.timeloop_archdir
if mapper_dir is None:
self.timeloop_mapper = Timeloop_config.timeloop_mapperdir
if workload_dir is None:
self.timeloop_workload = Timeloop_config.timeloop_workloaddir
if target_val is None:
self.target_val = np.array([Timeloop_config.target_energy,
Timeloop_config.target_area,
Timeloop_config.target_cycles])
if num_cores is None:
self.cores = int(Timeloop_config.timeloop_numcores)
self.cores = self.cores//8 # 8 threads per timeloop run
self.cumulative_reward = 0
self.helpers = helpers()
# Batch mode directories
self.timeloop_script_batch = []
self.timeloop_output_batch = []
self.timeloop_arch_batch = []
[docs]
def step(self, action_params):
'''Take an action in a timestep'''
# Assumes that the action here is the modified architecture parameters for now
self.steps += 1
obs = self.run_timeloop(action_params)
done = True
reward = self.calculate_reward(obs)
self.cumulative_reward += reward
return obs, reward, done, {}
[docs]
def step_multiagent(self, action_params):
'''Take one action for multiple agents in each timestep'''
self.steps += 1
# create copies of all the directories
for agent_ids in range(len(action_params)):
s, o, a = self.helpers.create_timeloop_dirs(
agent_ids, self.timeloop_script, self.timeloop_output, self.timeloop_arch)
self.timeloop_script_batch.append(s)
self.timeloop_output_batch.append(o)
self.timeloop_arch_batch.append(a)
obs_batch = self.run_timeloop_batch(action_params)
# Calculate the reward for each agent
rewards = []
for obs in obs_batch:
rewards.append(self.calculate_reward(obs))
print(obs)
print(rewards)
done = True
return obs_batch, rewards, done, {}
[docs]
def reset(self):
'''Reset the environment and associated variables'''
print("Resetting Environment")
# All unused?
self.steps = 0
self.cumulative_reward = 0
obs = self.observation_space.sample()
return obs
[docs]
def reset_multiagent(self):
'''Resets the multi-agent environment and associated variables'''
self.helpers.remove_dirs(self.timeloop_script_batch)
self.helpers.remove_dirs(self.timeloop_output_batch)
self.helpers.remove_dirs(self.timeloop_arch_batch)
self.timeloop_script_batch = []
self.timeloop_output_batch = []
self.timeloop_arch_batch = []
# All unused?
self.steps = 0
self.cumulative_reward = 0
obs = self.observation_space.sample()
return obs
[docs]
def run_timeloop(self, arch_params):
'''Invokes the timeloop scripts'''
energy, area, cycles = simulate_timeloop.simulate_timeloop(self.timeloop_script, self.timeloop_output,
self.timeloop_arch, self.timeloop_mapper, self.timeloop_workload, arch_params)
obs = np.array([energy, area, cycles])
return obs
[docs]
def run_timeloop_batch(self, multi_arch_params):
'''Invokes the timeloop scripts in batch mode for all agents'''
obs = []
pool_params = []
pool = multiprocessing.Pool(self.cores)
for agent in range(len(multi_arch_params)):
params = (self.timeloop_script_batch[agent], self.timeloop_output_batch[agent],
self.timeloop_arch_batch[agent], self.timeloop_mapper,
self.timeloop_workload, multi_arch_params[agent])
pool_params.append(params)
energy, area, cycles = zip(*pool.starmap(simulate_timeloop.simulate_timeloop, pool_params))
for e, a, c in zip(energy, area, cycles):
o = np.array([e, a, c])
obs.append(o)
return obs
[docs]
def calculate_reward(self, obs):
'''
Calculates the reward based on the current observation
'''
if obs[0] == -1.0 or obs[1] == -1.0 or obs[2] == -1.0:
# return a very high number since timeloop failed
# assumes you can achieve at max 99% improvement on each metric
return 500*(len(self.target_val))
reward = 1e-3
if (self.reward_formulation == 'energy'):
reward = max(((obs[0] - self.target_val[0])/self.target_val[0]), 0)
elif (self.reward_formulation == 'area'):
reward = max(((obs[1] - self.target_val[1])/self.target_val[1]), 0)
elif (self.reward_formulation == 'latency'):
reward = max(((obs[2] - self.target_val[2])/self.target_val[2]), 0)
elif (self.reward_formulation == 'joint'):
reward += max(((obs[0] - self.target_val[0])/self.target_val[0]), 0)
reward += max(((obs[1] - self.target_val[1])/self.target_val[1]), 0)
reward += max(((obs[2] - self.target_val[2])/self.target_val[2]), 0)
# some algo (ACO) will throw error if reward is 0. So set it to a very small number
if (reward == 0):
reward = 1e-5
return reward