import os
import sys
import csv
settings_file_path = os.path.realpath(__file__)
settings_dir_path = os.path.dirname(settings_file_path)
os.sys.path.insert(0, settings_dir_path)
os.sys.path.insert(0, settings_dir_path + '/../../')
from configs.sims import Gamma_config
import gym
import glob
from gym.utils import seeding
from envHelpers import helpers
from loggers import write_csv
import numpy as np
# ToDo: Have a configuration for Arch-Gym to manipulate this methods
import sys
from subprocess import Popen, PIPE
import time
import re
import numpy
import random
import pandas as pd
from math import ceil
[docs]
class MasteroEnv(gym.Env):
def __init__(self,
rl_form: str = 'tdm',
rl_algo: str = 'ppo',
max_steps: int = 100,
num_agents: int = 1,
reward_formulation: str = 'latency',
reward_scaling: str = 'false',
mapping_file: str = 'mapping.csv',
workload: str = 'resnet18',
layer_id: int = 2,
noc_bw: int = 1073741824,
offchip_bw: int = 1073741824,
l1_size: int = 1073741824,
l2_size: int = 1073741824,
num_pe: int = 1024,
):
self._executable = Gamma_config.mastero_exe_file
self.mapping_file = mapping_file
self.num_agents = num_agents
self.NocBW = noc_bw
self.offchipBW = offchip_bw
self.l1_size = l1_size
self.l2_size = l2_size
self.num_pe = num_pe
self.max_steps = max_steps
self.rl_form = rl_form
self.steps = 0
self.episode = 0
self.workload = workload
self.layer_id = layer_id
self.reward_type = reward_formulation
self.helpers = helpers()
self.dimension, _ = self.helpers.get_dimensions(workload=self.workload, layer_id=self.layer_id)
print("dimension: ", self.dimension)
print("Reward Formulation", self.reward_type)
if self.rl_form == 'macme':
self.observation_space = [
gym.spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)]* self.num_agents
self.action_space = [
# seed for permuting
gym.spaces.Discrete(720),
# C,K,X, Y
gym.spaces.Discrete(4),
# S
gym.spaces.Discrete(2),
# R
gym.spaces.Discrete(2),
# K
gym.spaces.Discrete(self.dimension['K']),
# C
gym.spaces.Discrete(self.dimension['C']),
# X
gym.spaces.Discrete(self.dimension['X']),
# Y
gym.spaces.Discrete(self.dimension['Y']),
# C,K,X, Y
gym.spaces.Discrete(4),
# S
gym.spaces.Discrete(2),
# R
gym.spaces.Discrete(2),
# K
gym.spaces.Discrete(self.dimension['K']),
# C
gym.spaces.Discrete(self.dimension['C']),
# X
gym.spaces.Discrete(self.dimension['X']),
# Y
gym.spaces.Discrete(self.dimension['Y']),
# seed for permuting
gym.spaces.Discrete(720),
# Num PE
gym.spaces.Discrete(10)
]
else:
self.observation_space = gym.spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
self.action_space = gym.spaces.Box(low=1, high=2, shape=(17,), dtype=np.float32)
[docs]
def clean_sim_files(self, file_path):
# split the file_path into file name and directory
dir_path = os.path.dirname(file_path)
# use glob to get the list of files
csv_files = glob.glob(os.path.join(dir_path, '*.csv'))
m_files = glob.glob(os.path.join(dir_path, '*.m'))
# remove the files
for csv_files in csv_files:
csv_files = os.path.join(file_path, csv_files)
if os.path.exists(csv_files):
os.remove(csv_files)
for m_files in m_files:
m_files = os.path.join(file_path, m_files)
if os.path.exists(m_files):
os.remove(m_files)
[docs]
def step(self, action):
self.steps += 1
done = False
if self.rl_form == 'macme':
# TODO(Sri) implement this
action_decoded = self.helpers.decode_action_list_multiagent(action)
else:
action_discretized = self.helpers.decode_action_list_rl(action, self.dimension)
action_decoded = self.helpers.decode_action_list(action_discretized)
m_file = "{}".format(random.randint(0, 2**32))
arch_configs = {
"NocBW": self.NocBW,
"offchipBW": self.offchipBW,
"l1_size": self.l1_size,
"l2_size": self.l2_size,
"num_pe": self.num_pe
}
# write the action to the file
m_file_path = self.helpers.write_maestro(indv = action_decoded, workload=self.workload, layer_id = self.layer_id, m_file = m_file)
# run the maestro
obs = self.helpers.run_maestro(self._executable, m_file, arch_configs)
obs = obs.reshape(4,)
print("obs: ", obs)
reward = self.calculate_reward(obs)
print("reward: ", reward)
if(self.steps == self.max_steps):
done = True
print("Maximum steps per episodes reached!")
self.reset()
self.episode +=1
if self.rl_form == "macme":
obs = [obs.copy()] * self.num_agents
# clean the files
self.clean_sim_files(m_file_path)
return obs, reward, done, {}
[docs]
def calculate_reward(self, stats):
if self.reward_type == 'latency':
# check if the obs has -1 in it
if stats[0]<= 0:
reward = -1
else:
# flatten the list
reward = 1/stats[0]
elif self.reward_type == 'energy':
if stats[2]<= 0:
reward = -1
else:
reward = 1/(stats[0]*stats[2] * 1e4)
elif self.reward_type == 'area':
if stats[3]<= 0:
reward = -1
else:
reward = 1/stats[3]
else:
print("FUckerQ")
return reward
[docs]
def reset(self):
self.steps = 0
# get the current directory
file_path = os.path.dirname(os.path.realpath(__file__))
# find wildcard csv and m files
csv_files = [f for f in os.listdir(file_path) if f.endswith('.csv')]
m_files = [f for f in os.listdir(file_path) if f.endswith('.m')]
# get the file path
file_path = os.path.dirname(os.path.realpath(__file__))
# remove the files
for csv_files in csv_files:
csv_files = os.path.join(file_path, csv_files)
if os.path.exists(csv_files):
os.remove(csv_files)
for m_files in m_files:
m_files = os.path.join(file_path, m_files)
if os.path.exists(m_files):
os.remove(m_files)
if self.rl_form == 'macme':
obs = [np.zeros(self.observation_space[0].shape)] * self.num_agents
else:
obs = np.zeros(self.observation_space.shape)
return obs
# For testing
if __name__ == "__main__":
exe_file = "../../cost_model/maestro"
mapping_file = "1322331445"
workload = "resnet18"
noc_bw = 1073741824
offchip_bw = 1073741824
l1_size = 1073741824
l2_size = 1073741824
num_pe = 1024
env = MasteroEnv(
mapping_file = mapping_file,
noc_bw = noc_bw,
offchip_bw = offchip_bw,
l1_size = l1_size,
l2_size = l2_size,
num_pe = num_pe,
workload = workload
)
dimension, _ = env.helpers.get_dimensions(workload=workload, layer_id=2)
print("dimension: ", dimension)
lb=[0, 0, dimension['S']-1, dimension['R']-1, 1, 1, 1, 1, 0, dimension['S']-1, dimension['R']-1, 1, 1, 1, 1, 0,1],
ub=[(2**32)-1, 3, dimension['S'], dimension['R'],
dimension['K'], dimension['C'], dimension['X'],
dimension['Y'], 3, dimension['S'], dimension['R'],
dimension['K'], dimension['C'], dimension['X'], dimension['Y'], (2**32)-1, 1024]
# generate random action with lower bound of lb and upper bound of ub.
action = np.random.randint(lb, ub)
# reset the environment
env.reset()
# take a step
obs, reward, _,_ = env.step(action)
print("obs: ", obs)
print("reward: ", reward)
# reset the environment
env.reset()