Source code for arch_gym.envs.MasteroEnv

import os
import sys
import csv

settings_file_path = os.path.realpath(__file__)
settings_dir_path = os.path.dirname(settings_file_path)
os.sys.path.insert(0, settings_dir_path)

os.sys.path.insert(0, settings_dir_path + '/../../')

from configs.sims import Gamma_config
import gym
import glob
from gym.utils import seeding
from envHelpers import helpers

from loggers import write_csv
import numpy as np

# ToDo: Have a configuration for Arch-Gym to manipulate this methods

import sys

from subprocess import Popen, PIPE
import time
import re
import numpy

import random
import pandas as pd
from math import ceil


[docs]
class MasteroEnv(gym.Env):

    def __init__(self,
                 rl_form: str = 'tdm',
                 rl_algo: str = 'ppo',
                 max_steps: int = 100,
                 num_agents: int = 1,
                 reward_formulation: str = 'latency',
                 reward_scaling: str = 'false',
                 mapping_file: str = 'mapping.csv',
                 workload: str = 'resnet18',
                 layer_id: int = 2,
                 noc_bw: int = 1073741824,
                 offchip_bw: int = 1073741824,
                 l1_size: int = 1073741824,
                 l2_size: int = 1073741824,
                 num_pe: int = 1024,
                 ):
        self._executable = Gamma_config.mastero_exe_file
        self.mapping_file = mapping_file
        self.num_agents = num_agents
        self.NocBW = noc_bw
        self.offchipBW = offchip_bw
        self.l1_size = l1_size
        self.l2_size = l2_size
        self.num_pe = num_pe
        self.max_steps = max_steps
        self.rl_form = rl_form
        self.steps = 0
        self.episode = 0
        self.workload = workload
        self.layer_id = layer_id
        self.reward_type = reward_formulation
        self.helpers = helpers() 

        self.dimension, _ = self.helpers.get_dimensions(workload=self.workload, layer_id=self.layer_id)
        print("dimension: ", self.dimension) 
        print("Reward Formulation", self.reward_type)
        
        if self.rl_form == 'macme':
            self.observation_space = [
                gym.spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)]* self.num_agents

            self.action_space = [    
                # seed for permuting
                gym.spaces.Discrete(720),
                
                # C,K,X, Y
                gym.spaces.Discrete(4),
                
                # S
                gym.spaces.Discrete(2),
                
                # R
                gym.spaces.Discrete(2),
                
                # K
                gym.spaces.Discrete(self.dimension['K']),

                # C
                gym.spaces.Discrete(self.dimension['C']),

                # X
                gym.spaces.Discrete(self.dimension['X']),

                # Y
                gym.spaces.Discrete(self.dimension['Y']),
                
                # C,K,X, Y
                gym.spaces.Discrete(4),

                 # S
                gym.spaces.Discrete(2),
                
                # R
                gym.spaces.Discrete(2),
                
                # K
                gym.spaces.Discrete(self.dimension['K']),

                # C
                gym.spaces.Discrete(self.dimension['C']),

                # X
                gym.spaces.Discrete(self.dimension['X']),

                # Y
                gym.spaces.Discrete(self.dimension['Y']),

                # seed for permuting
                gym.spaces.Discrete(720),

                # Num PE
                gym.spaces.Discrete(10)
            ]
        else:
            self.observation_space = gym.spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
            self.action_space = gym.spaces.Box(low=1, high=2, shape=(17,), dtype=np.float32)


[docs]
    def clean_sim_files(self, file_path):

        # split the file_path into file name and directory
        dir_path = os.path.dirname(file_path)

        # use glob to get the list of files
        csv_files = glob.glob(os.path.join(dir_path, '*.csv'))
        m_files = glob.glob(os.path.join(dir_path, '*.m'))

        # remove the files
        for csv_files in csv_files:
            csv_files = os.path.join(file_path, csv_files)
            if os.path.exists(csv_files):
                os.remove(csv_files)
        
        for m_files in m_files:
            m_files = os.path.join(file_path, m_files)
            if os.path.exists(m_files):
                os.remove(m_files)



[docs]
    def step(self, action):
        
        self.steps += 1
        done = False

        if self.rl_form == 'macme':
            # TODO(Sri) implement this
            action_decoded = self.helpers.decode_action_list_multiagent(action)
        else:
            action_discretized = self.helpers.decode_action_list_rl(action, self.dimension)
            action_decoded = self.helpers.decode_action_list(action_discretized)

        m_file = "{}".format(random.randint(0, 2**32))
        
        arch_configs = {
            "NocBW": self.NocBW,
            "offchipBW": self.offchipBW,
            "l1_size": self.l1_size,
            "l2_size": self.l2_size,
            "num_pe": self.num_pe
        }
        # write the action to the file
        m_file_path = self.helpers.write_maestro(indv = action_decoded, workload=self.workload, layer_id = self.layer_id, m_file = m_file)

        # run the maestro
        obs = self.helpers.run_maestro(self._executable, m_file, arch_configs)

        obs = obs.reshape(4,)
        print("obs: ", obs)
        reward = self.calculate_reward(obs)
        print("reward: ", reward)
        if(self.steps == self.max_steps):
            done = True
            print("Maximum steps per episodes reached!")
            self.reset()
            self.episode +=1
        
        if self.rl_form == "macme":
            obs = [obs.copy()] * self.num_agents

        # clean the files
        self.clean_sim_files(m_file_path)
        
        return obs, reward, done, {}



[docs]
    def calculate_reward(self, stats):
        
        if self.reward_type == 'latency':
            # check if the obs has -1 in it
            if stats[0]<= 0:
                reward = -1
            else:
                # flatten the list 
                reward = 1/stats[0]
        elif self.reward_type == 'energy':
            if stats[2]<= 0:
                reward = -1
            else:
                reward = 1/(stats[0]*stats[2] * 1e4)
        elif self.reward_type == 'area':
            if stats[3]<= 0:
                reward = -1
            else:
                reward = 1/stats[3]
        else:
            print("FUckerQ")

        return reward



[docs]
    def reset(self):
        self.steps = 0
        # get the current directory
        file_path = os.path.dirname(os.path.realpath(__file__))

        # find wildcard csv and m files
        csv_files = [f for f in os.listdir(file_path) if f.endswith('.csv')]
        m_files = [f for f in os.listdir(file_path) if f.endswith('.m')]

        # get the file path
        file_path = os.path.dirname(os.path.realpath(__file__))
        
        # remove the files
        for csv_files in csv_files:
            csv_files = os.path.join(file_path, csv_files)
            if os.path.exists(csv_files):
                os.remove(csv_files)
        
        for m_files in m_files:
            m_files = os.path.join(file_path, m_files)
            if os.path.exists(m_files):
                os.remove(m_files)
        if self.rl_form == 'macme':
            obs = [np.zeros(self.observation_space[0].shape)] * self.num_agents
        else:
            obs = np.zeros(self.observation_space.shape)
        return obs 




# For testing
if __name__ == "__main__":
        
        exe_file = "../../cost_model/maestro"
        mapping_file = "1322331445"
        workload = "resnet18"
        noc_bw = 1073741824
        offchip_bw = 1073741824
        l1_size = 1073741824
        l2_size = 1073741824
        num_pe = 1024
        
        
        env = MasteroEnv(
            mapping_file = mapping_file,
            noc_bw = noc_bw,
            offchip_bw = offchip_bw,
            l1_size = l1_size,
            l2_size = l2_size,
            num_pe = num_pe,
            workload = workload
        )
        
        dimension, _ = env.helpers.get_dimensions(workload=workload, layer_id=2)
        print("dimension: ", dimension) 

        lb=[0, 0,  dimension['S']-1, dimension['R']-1, 1, 1, 1, 1, 0, dimension['S']-1, dimension['R']-1, 1, 1, 1, 1, 0,1],
        ub=[(2**32)-1, 3, dimension['S'], dimension['R'],
            dimension['K'], dimension['C'], dimension['X'],
            dimension['Y'], 3, dimension['S'], dimension['R'],
            dimension['K'], dimension['C'], dimension['X'], dimension['Y'], (2**32)-1, 1024]

        # generate random action with lower bound of lb and upper bound of ub.
        action = np.random.randint(lb, ub)

        # reset the environment
        env.reset()

        # take a step
        obs, reward, _,_ = env.step(action)
        
        print("obs: ", obs)
        print("reward: ", reward)
        # reset the environment
        env.reset()