Parallel Environment Discrete PPO finish

Parallel Environment Discrete PPO finish. Runnable.
2022-10-30 04:13:14 +09:00 · 2022-10-30 04:13:14 +09:00 · 7497ffcb0f
commit 7497ffcb0f
parent 742529ccd7
7 changed files with 1087 additions and 23 deletions
--- a/.gitignore
+++ b/.gitignore
@ -76,8 +76,11 @@ crashlytics-build.properties
 /Aimbot-PPO-Python/.vscode/
 /Aimbot-PPO-Python/.mypy_cache/
 /Aimbot-PPO-Python/__pycache__/
+/Aimbot-PPO-Python/Tensorflow/__pycache__/
+/Aimbot-PPO-Python/Pytorch/__pycache__/
 /Aimbot-PPO-Python/Backup/
 /Aimbot-PPO-Python/Build-MultiScene-WithLoad/
 /Aimbot-PPO-Python/Build-CloseEnemyCut/
+/Aimbot-PPO-Python/Build-ParallelEnv/
 /Aimbot-PPO-Python/PPO-Model/
 /Aimbot-PPO-Python/GAIL-Expert-Data/
--- a/Aimbot-PPO-Python/Pytorch/AimBotEnv-old.py
+++ b/Aimbot-PPO-Python/Pytorch/AimBotEnv-old.py
@ -0,0 +1,161 @@
+from mlagents_envs.base_env import ActionTuple
+from mlagents_envs.environment import UnityEnvironment
+
+import numpy as np
+from numpy import ndarray
+
+
+class makeEnv(object):
+    def __init__(
+        self,
+        envPath: str,
+        workerID: int = 1,
+        basePort: int = 100,
+        stackSize: int = 1,
+        stackIntercal: int = 0,
+    ):
+        self.env = UnityEnvironment(
+            file_name=envPath,
+            seed=1,
+            side_channels=[],
+            worker_id=workerID,
+            base_port=basePort,
+        )
+        self.env.reset()
+
+        # get enviroment specs
+        self.LOAD_DIR_SIZE_IN_STATE = 3
+        self.TRACKED_AGENT = -1
+        self.BEHA_SPECS = self.env.behavior_specs
+        self.BEHA_NAME = list(self.BEHA_SPECS)[0]
+        self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
+        self.OBSERVATION_SPECS = self.SPEC.observation_specs[0]  # observation spec
+        self.ACTION_SPEC = self.SPEC.action_spec  # action specs
+
+        self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size
+        self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
+        self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size
+        self.SINGLE_STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
+        self.STATE_SIZE = self.SINGLE_STATE_SIZE * stackSize
+
+        # stacked State
+        self.STACK_SIZE = stackSize
+        self.STATE_BUFFER_SIZE = stackSize + ((stackSize - 1) * stackIntercal)
+        self.STACK_INDEX = list(range(0, self.STATE_BUFFER_SIZE, stackIntercal + 1))
+        self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
+        print("√√√√√Enviroment Initialized Success√√√√√")
+
+    def step(
+        self,
+        actions: list,
+        behaviorName: ndarray = None,
+        trackedAgent: int = None,
+    ):
+        """change ations list to ActionTuple then send it to enviroment
+
+        Args:
+            actions (list): PPO chooseAction output action list
+            behaviorName (ndarray, optional): behaviorName. Defaults to None.
+            trackedAgent (int, optional): trackedAgentID. Defaults to None.
+
+        Returns:
+            ndarray: nextState, reward, done, loadDir, saveNow
+        """
+        # take action to enviroment
+        # return mextState,reward,done
+        if self.DISCRETE_SIZE == 0:
+            # create empty discrete action
+            discreteActions = np.asarray([[0]])
+        else:
+            # create discrete action from actions list
+            discreteActions = np.asanyarray([actions[0 : self.DISCRETE_SIZE]])
+        if self.CONTINUOUS_SIZE == 0:
+            # create empty continuous action
+            continuousActions = np.asanyarray([[0.0]])
+        else:
+            # create continuous actions from actions list
+            continuousActions = np.asanyarray([actions[self.DISCRETE_SIZE :]])
+
+        if behaviorName is None:
+            behaviorName = self.BEHA_NAME
+        if trackedAgent is None:
+            trackedAgent = self.TRACKED_AGENT
+
+        # create actionTuple
+        thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
+        # take action to env
+        self.env.set_actions(behavior_name=behaviorName, action=thisActionTuple)
+        self.env.step()
+        # get nextState & reward & done after this action
+        nextState, reward, done, loadDir, saveNow = self.getSteps(behaviorName, trackedAgent)
+        return nextState, reward, done, loadDir, saveNow
+
+    def getSteps(self, behaviorName=None, trackedAgent=None):
+        """get enviroment now observations.
+        Include State, Reward, Done, LoadDir, SaveNow
+
+        Args:
+            behaviorName (_type_, optional): behaviorName. Defaults to None.
+            trackedAgent (_type_, optional): trackedAgent. Defaults to None.
+
+        Returns:
+            ndarray: nextState, reward, done, loadDir, saveNow
+        """
+        # get nextState & reward & done
+        if behaviorName is None:
+            behaviorName = self.BEHA_NAME
+        decisionSteps, terminalSteps = self.env.get_steps(behaviorName)
+        if self.TRACKED_AGENT == -1 and len(decisionSteps) >= 1:
+            self.TRACKED_AGENT = decisionSteps.agent_id[0]
+        if trackedAgent is None:
+            trackedAgent = self.TRACKED_AGENT
+
+        if trackedAgent in decisionSteps:  # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
+            nextState = decisionSteps[trackedAgent].obs[0]
+            nextState = np.reshape(
+                nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
+            )
+            saveNow = nextState[-1]
+            loadDir = nextState[-3:-1]
+            nextState = nextState[:-3]
+            reward = decisionSteps[trackedAgent].reward
+            done = False
+        if trackedAgent in terminalSteps:  # ゲーム終了した場合、環境状態がterminal_stepsに保存される
+            nextState = terminalSteps[trackedAgent].obs[0]
+            nextState = np.reshape(
+                nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
+            )
+            saveNow = nextState[-1]
+            loadDir = nextState[-3:-1]
+            nextState = nextState[:-3]
+            reward = terminalSteps[trackedAgent].reward
+            done = True
+
+        # stack state
+        stackedStates = self.stackStates(nextState)
+        return stackedStates, reward, done, loadDir, saveNow
+
+    def reset(self):
+        """reset enviroment and get observations
+
+        Returns:
+            ndarray: nextState, reward, done, loadDir, saveNow
+        """
+        # reset buffer
+        self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
+        # reset env
+        self.env.reset()
+        nextState, reward, done, loadDir, saveNow = self.getSteps()
+        return nextState, reward, done, loadDir, saveNow
+
+    def stackStates(self, state):
+        # save buffer
+        self.statesBuffer[0:-1] = self.statesBuffer[1:]
+        self.statesBuffer[-1] = state
+
+        # return stacked states
+        return np.reshape(self.statesBuffer[self.STACK_INDEX], (self.STATE_SIZE))
+
+    def render(self):
+        """render enviroment"""
+        self.env.render()
--- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
+++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
@ -0,0 +1,146 @@
+import gym
+import numpy as np
+
+from numpy import ndarray
+from mlagents_envs.base_env import ActionTuple
+from mlagents_envs.environment import UnityEnvironment
+
+
+class Aimbot(gym.Env):
+    def __init__(
+        self,
+        envPath: str,
+        workerID: int = 1,
+        basePort: int = 100,
+    ):
+        super(Aimbot, self).__init__()
+        self.env = UnityEnvironment(
+            file_name=envPath,
+            seed=1,
+            side_channels=[],
+            worker_id=workerID,
+            base_port=basePort,
+        )
+        self.env.reset()
+        # all behavior_specs
+        self.unity_specs = self.env.behavior_specs
+        #  environment behavior name
+        self.unity_beha_name = list(self.unity_specs)[0]
+        #  environment behavior spec
+        self.unity_specs = self.unity_specs[self.unity_beha_name]
+        #  environment observation_space
+        self.unity_obs_specs = self.unity_specs.observation_specs[0]
+        #  environment action specs
+        self.unity_action_spec = self.unity_specs.action_spec
+        #  environment sample observation
+        decisionSteps, _ = self.env.get_steps(self.unity_beha_name)
+
+        # OBSERVATION SPECS
+        #  environment state shape. like tuple:(93,)
+        self.unity_observation_shape = self.unity_obs_specs.shape
+
+        # ACTION SPECS
+        #  environment continuous action number. int
+        self.unity_continuous_size = self.unity_action_spec.continuous_size
+        #  environment discrete action shapes. list (3,3,2)
+        self.unity_discrete_branches = self.unity_action_spec.discrete_branches
+        #  environment discrete action type. int 3
+        self.unity_discrete_type = self.unity_action_spec.discrete_size
+        # environment discrete action type. int 3+3+2=8
+        self.unity_discrete_size = sum(self.unity_discrete_branches)
+
+        # AGENT SPECS
+        # all agents ID
+        self.unity_agent_IDS = decisionSteps.agent_id
+        # agents number
+        self.unity_agent_num = len(self.unity_agent_IDS)
+
+    def reset(self):
+        """reset enviroment and get observations
+
+        Returns:
+            ndarray: nextState, reward, done, loadDir, saveNow
+        """
+        # reset env
+        self.env.reset()
+        nextState, reward, done = self.getSteps()
+        return nextState, reward, done
+
+    # TODO:
+    # delete all stack state DONE
+    # getstep State disassembly function DONE
+    # delete agent selection function DONE
+    # self.step action wrapper function DONE
+    def step(
+        self,
+        actions: ndarray,
+    ):
+        """change ations list to ActionTuple then send it to enviroment
+
+        Args:
+            actions (ndarray): PPO chooseAction output action list.(agentNum,actionNum)
+
+        Returns:
+            ndarray: nextState, reward, done
+        """
+        # take action to enviroment
+        # return mextState,reward,done
+        if self.unity_discrete_size == 0:
+            # create empty discrete action
+            discreteActions = np.asarray([[0]])
+        else:
+            # create discrete action from actions list
+            discreteActions = actions[:, 0 : self.unity_discrete_size]
+        """
+        if self.unity_continuous_size == 0:
+            # create empty continuous action
+            continuousActions = np.asanyarray([[0.0]])
+        else:
+            # create continuous actions from actions list
+            continuousActions = actions[:,self.unity_discrete_size :]
+        """
+        continuousActions = np.asanyarray([[0.0], [0.0], [0.0], [0.0]])
+        # create actionTuple
+        thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
+        # take action to env
+        self.env.set_actions(behavior_name=self.unity_beha_name, action=thisActionTuple)
+        self.env.step()
+        # get nextState & reward & done after this action
+        nextStates, rewards, dones = self.getSteps()
+        return nextStates, rewards, dones
+
+    def getSteps(self):
+        """get enviroment now observations.
+        Include State, Reward, Done
+
+        Args:
+
+        Returns:
+            ndarray: nextState, reward, done
+        """
+        # get nextState & reward & done
+        decisionSteps, terminalSteps = self.env.get_steps(self.unity_beha_name)
+        nextStates = []
+        dones = []
+        rewards = []
+        for thisAgentID in self.unity_agent_IDS:
+            # while Episode over agentID will both in decisionSteps and terminalSteps.
+            # avoid redundant state and reward,
+            # use agentExist toggle to check if agent is already exist.
+            agentExist = False
+            # game done
+            if thisAgentID in terminalSteps:
+                nextStates.append(terminalSteps[thisAgentID].obs[0])
+                dones.append(True)
+                rewards.append(terminalSteps[thisAgentID].reward)
+                agentExist = True
+            # game not over yet and agent not in terminalSteps
+            if (thisAgentID in decisionSteps) and (not agentExist):
+                nextStates.append(decisionSteps[thisAgentID].obs[0])
+                dones.append(False)
+                rewards.append(decisionSteps[thisAgentID].reward)
+
+        return np.asarray(nextStates), rewards, dones
+
+    def close(self):
+        self.env.close()
--- a/Aimbot-PPO-Python/Pytorch/ppo.py
+++ b/Aimbot-PPO-Python/Pytorch/ppo.py
@ -0,0 +1,291 @@
+import argparse
+import time
+import numpy as np
+import random
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+from AimbotEnv import Aimbot
+from torch.distributions.normal import Normal
+from torch.distributions.categorical import Categorical
+from distutils.util import strtobool
+from torch.utils.tensorboard import SummaryWriter
+
+DEFAULT_SEED = 9331
+ENV_PATH = "../Build-ParallelEnv/Aimbot-ParallelEnv"
+WORKER_ID = 1
+BASE_PORT = 2002
+
+
+LEARNING_RATE = 2e-3
+GAMMA = 0.99
+GAE_LAMBDA = 0.95
+TOTAL_STEPS = 2000000
+STEP_NUM = 128
+MINIBATCH_NUM = 4
+EPOCHS = 4
+CLIP_COEF = 0.1
+ENTROPY_COEF = 0.01
+CRITIC_COEF = 0.5
+
+ANNEAL_LEARNING_RATE = True
+CLIP_VLOSS = True
+NORM_ADV = True
+
+
+def parse_args():
+    # fmt: off
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--seed", type=int, default=DEFAULT_SEED,
+                        help="seed of the experiment")
+    parser.add_argument("--path", type=str, default=ENV_PATH,
+                        help="enviroment path")
+    parser.add_argument("--workerID", type=int, default=WORKER_ID,
+                        help="unity worker ID")
+    parser.add_argument("--baseport", type=int, default=BASE_PORT,
+                        help="port to connect to Unity environment")
+    parser.add_argument("--lr", type=float, default=LEARNING_RATE,
+                        help="the learning rate of optimizer")
+    parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
+                        help="if toggled, cuda will be enabled by default")
+    parser.add_argument("--total-timesteps", type=int, default=TOTAL_STEPS,
+                        help="total timesteps of the experiments")
+
+    parser.add_argument("--stepNum", type=int, default=STEP_NUM,
+                        help="the number of steps to run in each environment per policy rollout")
+    parser.add_argument("--minibatchesNum", type=int, default=MINIBATCH_NUM,
+                        help="the number of mini-batches")
+    parser.add_argument("--epochs", type=int, default=EPOCHS,
+                        help="the K epochs to update the policy")
+    parser.add_argument("--annealLR", type=lambda x: bool(strtobool(x)), default=ANNEAL_LEARNING_RATE, nargs="?", const=True,
+                        help="Toggle learning rate annealing for policy and value networks")
+    # GAE
+    parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
+                        help="Use GAE for advantage computation")
+    parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=NORM_ADV, nargs="?", const=True,
+                        help="Toggles advantages normalization")
+    parser.add_argument("--gamma", type=float, default=GAMMA,
+                        help="the discount factor gamma")
+    parser.add_argument("--gaeLambda", type=float, default=GAE_LAMBDA,
+                        help="the lambda for the general advantage estimation")
+    parser.add_argument("--clip-coef", type=float, default=CLIP_COEF,
+                        help="the surrogate clipping coefficient")
+    parser.add_argument("--ent-coef", type=float, default=ENTROPY_COEF,
+                        help="coefficient of the entropy")
+    parser.add_argument("--critic-coef", type=float, default=CRITIC_COEF,
+                        help="coefficient of the value function")
+    parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=CLIP_VLOSS, nargs="?", const=True,
+                        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
+    parser.add_argument("--max-grad-norm", type=float, default=0.5,
+                        help="the maximum norm for the gradient clipping")
+    parser.add_argument("--target-kl", type=float, default=None,
+                        help="the target KL divergence threshold")
+    # fmt: on
+    args = parser.parse_args()
+    return args
+
+
+def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
+    torch.nn.init.orthogonal_(layer.weight, std)
+    torch.nn.init.constant_(layer.bias, bias_const)
+    return layer
+
+
+class PPOAgent(nn.Module):
+    def __init__(self, env: Aimbot):
+        super(PPOAgent, self).__init__()
+        self.discrete_size = env.unity_discrete_size
+        self.discrete_shape = list(env.unity_discrete_branches)
+
+        self.network = nn.Sequential(
+            layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 128)),
+            nn.Tanh(),
+            layer_init(nn.Linear(128, 128)),
+            nn.ReLU(),
+            layer_init(nn.Linear(128, 128)),
+            nn.ReLU(),
+        )
+        self.dis_Actor = layer_init(nn.Linear(128, self.discrete_size), std=0.01)
+        self.critic = layer_init(nn.Linear(128, 1), std=1)
+
+    def get_value(self, state: torch.Tensor):
+        return self.critic(self.network(state))
+
+    def get_actions_value(self, state: torch.Tensor, actions=None):
+        hidden = self.network(state)
+        dis_logits = self.dis_Actor(hidden)
+        split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)
+        multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]
+        if actions is None:
+            actions = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
+        log_prob = torch.stack(
+            [ctgr.log_prob(act) for act, ctgr in zip(actions, multi_categoricals)]
+        )
+        entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
+        return actions.T, log_prob.sum(0), entropy.sum(0), self.critic(hidden)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+
+    device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
+
+    # Initialize environment anget optimizer
+    env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport)
+    agent = PPOAgent(env).to(device)
+    optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
+
+    # Memory Record
+    obs = torch.zeros((args.stepNum, env.unity_agent_num) + env.unity_observation_shape).to(device)
+    actions = torch.zeros((args.stepNum, env.unity_agent_num) + (env.unity_discrete_type,)).to(
+        device
+    )
+    logprobs = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
+    rewards = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
+    dones = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
+    values = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
+
+    # TRY NOT TO MODIFY: start the game
+    args.batch_size = int(env.unity_agent_num * args.stepNum)
+    args.minibatch_size = int(args.batch_size // args.minibatchesNum)
+    total_update_step = args.total_timesteps // args.batch_size
+    global_step = 0
+    start_time = time.time()
+    next_obs, _, _ = env.reset()
+    next_obs = torch.Tensor(next_obs).to(device)
+    next_done = torch.zeros(env.unity_agent_num).to(device)
+
+    for total_steps in range(total_update_step):
+        # discunt learning rate, while step == total_update_step lr will be 0
+        if args.annealLR:
+            frac = 1.0 - (total_steps - 1.0) / total_update_step
+            lrnow = frac * args.lr
+            optimizer.param_groups[0]["lr"] = lrnow
+
+        # MAIN LOOP: run agent in environment
+        for step in range(args.stepNum):
+            print(step)
+            global_step += 1 * env.unity_agent_num
+            obs[step] = next_obs
+            dones[step] = next_done
+
+            with torch.no_grad():
+                # predict actions
+                action, logprob, _, value = agent.get_actions_value(next_obs)
+                value = value.flatten()
+            next_obs, reward, done = env.step(action.cpu().numpy())
+
+            # save memories
+            actions[step] = action
+            logprobs[step] = logprob
+            values[step] = value
+            rewards[step] = torch.tensor(reward).to(device).view(-1)
+            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
+
+        # GAE
+        with torch.no_grad():
+            next_value = agent.get_value(next_obs).reshape(1, -1)
+            if args.gae:
+                advantages = torch.zeros_like(rewards).to(device)
+                lastgaelam = 0
+                for t in reversed(range(args.stepNum)):
+                    if t == args.stepNum - 1:
+                        nextnonterminal = 1.0 - next_done
+                        nextvalues = next_value
+                    else:
+                        nextnonterminal = 1.0 - dones[t + 1]
+                        nextvalues = values[t + 1]
+                    delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
+                    advantages[t] = lastgaelam = (
+                        delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
+                    )
+                returns = advantages + values
+            else:
+                returns = torch.zeros_like(rewards).to(device)
+                for t in reversed(range(args.stepNum)):
+                    if t == args.stepNum - 1:
+                        nextnonterminal = 1.0 - next_done
+                        next_return = next_value
+                    else:
+                        nextnonterminal = 1.0 - dones[t + 1]
+                        next_return = returns[t + 1]
+                    returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
+                advantages = returns - values
+
+        # flatten the batch
+        b_obs = obs.reshape((-1,) + env.unity_observation_shape)
+        b_logprobs = logprobs.reshape(-1)
+        b_actions = actions.reshape((-1,) + (env.unity_discrete_type,))
+        b_advantages = advantages.reshape(-1)
+        b_returns = returns.reshape(-1)
+        b_values = values.reshape(-1)
+
+        # Optimizing the policy and value network
+        b_inds = np.arange(args.batch_size)
+        clipfracs = []
+        for epoch in range(args.epochs):
+            # shuffle all datasets
+            np.random.shuffle(b_inds)
+            for start in range(0, args.batch_size, args.minibatch_size):
+                end = start + args.minibatch_size
+                mb_inds = b_inds[start:end]
+                mb_advantages = b_advantages[mb_inds]
+
+                # normalize advantages
+                if args.norm_adv:
+                    mb_advantages = (mb_advantages - mb_advantages.mean()) / (
+                        mb_advantages.std() + 1e-8
+                    )
+
+                # ratio
+                _, newlogprob, entropy, newvalue = agent.get_actions_value(
+                    b_obs[mb_inds], b_actions.long()[mb_inds].T
+                )
+                logratio = newlogprob - b_logprobs[mb_inds]
+                ratio = logratio.exp()
+
+                # early stop
+                with torch.no_grad():
+                    # calculate approx_kl http://joschu.net/blog/kl-approx.html
+                    old_approx_kl = (-logratio).mean()
+                    approx_kl = ((ratio - 1) - logratio).mean()
+                    clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()]
+
+                # Policy loss
+                pg_loss1 = -mb_advantages * ratio
+                pg_loss2 = -mb_advantages * torch.clamp(
+                    ratio, 1 - args.clip_coef, 1 + args.clip_coef
+                )
+                pg_loss = torch.max(pg_loss1, pg_loss2).mean()
+
+                # Value loss
+                newvalue = newvalue.view(-1)
+                if args.clip_vloss:
+                    v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2
+                    v_clipped = b_values[mb_inds] + torch.clamp(
+                        newvalue - b_values[mb_inds],
+                        -args.clip_coef,
+                        args.clip_coef,
+                    )
+                    v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2
+                    v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
+                    v_loss = 0.5 * v_loss_max.mean()
+                else:
+                    v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean()
+
+                entropy_loss = entropy.mean()
+                loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.critic_coef
+
+                optimizer.zero_grad()
+                loss.backward()
+                # Clips gradient norm of an iterable of parameters.
+                nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
+                optimizer.step()
+
+            if args.target_kl is not None:
+                if approx_kl > args.target_kl:
+                    break
--- a/Aimbot-PPO-Python/Pytorch/testEnv.py
+++ b/Aimbot-PPO-Python/Pytorch/testEnv.py
@ -0,0 +1,7 @@
+from AimbotGym import Aimbot
+
+ENV_PATH = "../Build-ParallelEnv/Aimbot-ParallelEnv"
+WORKER_ID = 1
+BASE_PORT = 2002
+
+env = Aimbot(envPath=ENV_PATH,workerID= WORKER_ID,basePort= BASE_PORT)
--- a/Aimbot-PPO-Python/Pytorch/testarea.ipynb
+++ b/Aimbot-PPO-Python/Pytorch/testarea.ipynb
@ -0,0 +1,453 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Action, 1 continuous ctrl 2.1\n",
+      "Action, 0 continuous ctrl -1.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import gym\n",
+    "from gym.spaces import Dict, Discrete, Box, Tuple\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "class SampleGym(gym.Env):\n",
+    "    def __init__(self, config={}):\n",
+    "        self.config = config\n",
+    "        self.action_space = Tuple((Discrete(2), Box(-10, 10, (2,))))\n",
+    "        self.observation_space = Box(-10, 10, (2, 2))\n",
+    "        self.p_done = config.get(\"p_done\", 0.1)\n",
+    "\n",
+    "    def reset(self):\n",
+    "        return self.observation_space.sample()\n",
+    "\n",
+    "    def step(self, action):\n",
+    "        chosen_action = action[0]\n",
+    "        cnt_control = action[1][chosen_action]\n",
+    "\n",
+    "        if chosen_action == 0:\n",
+    "            reward = cnt_control\n",
+    "        else:\n",
+    "            reward = -cnt_control - 1\n",
+    "\n",
+    "        print(f\"Action, {chosen_action} continuous ctrl {cnt_control}\")\n",
+    "        return (\n",
+    "            self.observation_space.sample(),\n",
+    "            reward,\n",
+    "            bool(np.random.choice([True, False], p=[self.p_done, 1.0 - self.p_done])),\n",
+    "            {},\n",
+    "        )\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    env = SampleGym()\n",
+    "    env.reset()\n",
+    "    env.step((1, [-1, 2.1]))  # should say use action 1 with 2.1\n",
+    "    env.step((0, [-1.1, 2.1]))  # should say use action 0 with -1.1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mlagents_envs.environment import UnityEnvironment\n",
+    "from gym_unity.envs import UnityToGymWrapper\n",
+    "import numpy as np\n",
+    "\n",
+    "ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
+    "WORKER_ID = 1\n",
+    "BASE_PORT = 2002\n",
+    "\n",
+    "env = UnityEnvironment(\n",
+    "    file_name=ENV_PATH,\n",
+    "    seed=1,\n",
+    "    side_channels=[],\n",
+    "    worker_id=WORKER_ID,\n",
+    "    base_port=BASE_PORT,\n",
+    ")\n",
+    "\n",
+    "trackedAgent = 0\n",
+    "env.reset()\n",
+    "BEHA_SPECS = env.behavior_specs\n",
+    "BEHA_NAME = list(BEHA_SPECS)[0]\n",
+    "SPEC = BEHA_SPECS[BEHA_NAME]\n",
+    "print(SPEC)\n",
+    "\n",
+    "decisionSteps, terminalSteps = env.get_steps(BEHA_NAME)\n",
+    "\n",
+    "if trackedAgent in decisionSteps:  # ゲーム終了していない場合、環境状態がdecision_stepsに保存される\n",
+    "    nextState = decisionSteps[trackedAgent].obs[0]\n",
+    "    reward = decisionSteps[trackedAgent].reward\n",
+    "    done = False\n",
+    "if trackedAgent in terminalSteps:  # ゲーム終了した場合、環境状態がterminal_stepsに保存される\n",
+    "    nextState = terminalSteps[trackedAgent].obs[0]\n",
+    "    reward = terminalSteps[trackedAgent].reward\n",
+    "    done = True\n",
+    "print(decisionSteps.agent_id)\n",
+    "print(terminalSteps)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "decisionSteps.agent_id [1 2 5 7]\n",
+      "decisionSteps.agent_id_to_index {1: 0, 2: 1, 5: 2, 7: 3}\n",
+      "decisionSteps.reward [0. 0. 0. 0.]\n",
+      "decisionSteps.action_mask [array([[False, False, False],\n",
+      "       [False, False, False],\n",
+      "       [False, False, False],\n",
+      "       [False, False, False]]), array([[False, False, False],\n",
+      "       [False, False, False],\n",
+      "       [False, False, False],\n",
+      "       [False, False, False]]), array([[False, False],\n",
+      "       [False, False],\n",
+      "       [False, False],\n",
+      "       [False, False]])]\n",
+      "decisionSteps.obs [  0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.          0.          0.          0.          0.\n",
+      "   0.          0.        -15.994009    1.        -26.322788    1.\n",
+      "   1.          1.          1.          1.          1.          2.\n",
+      "   1.          1.          1.          1.          1.          1.\n",
+      "   1.          1.3519633   1.6946528   2.3051548   3.673389    9.067246\n",
+      "  17.521473   21.727095   22.753294   24.167128   25.905216   18.35725\n",
+      "  21.02278    21.053417    0.       ]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'decisionSteps.obs [array([[-15.994009 ,   1.       , -26.322788 ,   1.       ,   1.       ,\\n          1.       ,   1.       ,   1.       ,   1.       ,   2.       ,\\n          1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\\n          1.       ,   1.       ,   1.3519633,   1.6946528,   2.3051548,\\n          3.673389 ,   9.067246 ,  17.521473 ,  21.727095 ,  22.753294 ,\\n         24.167128 ,  25.905216 ,  18.35725  ,  21.02278  ,  21.053417 ,\\n          0.       ],\\n       [ -1.8809433,   1.       , -25.66834  ,   1.       ,   2.       ,\\n          1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\\n          1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\\n          1.       ,   1.       ,  16.768637 ,  23.414627 ,  22.04486  ,\\n         21.050663 ,  20.486784 ,  20.486784 ,  21.050665 ,  15.049731 ,\\n         11.578419 ,   9.695194 ,  20.398016 ,  20.368341 ,  20.398016 ,\\n...\\n         20.551746 ,  20.00118  ,  20.001116 ,  20.551594 ,  21.5222   ,\\n         17.707508 ,  14.86889  ,  19.914494 ,  19.885508 ,  19.914463 ,\\n          0.       ]], dtype=float32)]'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print(\"decisionSteps.agent_id\",decisionSteps.agent_id)\n",
+    "# decisionSteps.agent_id [1 2 5 7]\n",
+    "print(\"decisionSteps.agent_id_to_index\",decisionSteps.agent_id_to_index)\n",
+    "# decisionSteps.agent_id_to_index {1: 0, 2: 1, 5: 2, 7: 3}\n",
+    "print(\"decisionSteps.reward\",decisionSteps.reward)\n",
+    "# decisionSteps.reward [0. 0. 0. 0.]\n",
+    "print(\"decisionSteps.action_mask\",decisionSteps.action_mask)\n",
+    "'''\n",
+    "decisionSteps.action_mask [array([[False, False, False],\n",
+    "       [False, False, False],\n",
+    "       [False, False, False],\n",
+    "       [False, False, False]]), array([[False, False, False],\n",
+    "       [False, False, False],\n",
+    "       [False, False, False],\n",
+    "       [False, False, False]]), array([[False, False],\n",
+    "       [False, False],\n",
+    "       [False, False],\n",
+    "       [False, False]])]\n",
+    "'''\n",
+    "print(\"decisionSteps.obs\", decisionSteps.obs[0][0])\n",
+    "'''decisionSteps.obs [array([[-15.994009 ,   1.       , -26.322788 ,   1.       ,   1.       ,\n",
+    "          1.       ,   1.       ,   1.       ,   1.       ,   2.       ,\n",
+    "          1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+    "          1.       ,   1.       ,   1.3519633,   1.6946528,   2.3051548,\n",
+    "          3.673389 ,   9.067246 ,  17.521473 ,  21.727095 ,  22.753294 ,\n",
+    "         24.167128 ,  25.905216 ,  18.35725  ,  21.02278  ,  21.053417 ,\n",
+    "          0.       ],\n",
+    "       [ -1.8809433,   1.       , -25.66834  ,   1.       ,   2.       ,\n",
+    "          1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+    "          1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+    "          1.       ,   1.       ,  16.768637 ,  23.414627 ,  22.04486  ,\n",
+    "         21.050663 ,  20.486784 ,  20.486784 ,  21.050665 ,  15.049731 ,\n",
+    "         11.578419 ,   9.695194 ,  20.398016 ,  20.368341 ,  20.398016 ,\n",
+    "...\n",
+    "         20.551746 ,  20.00118  ,  20.001116 ,  20.551594 ,  21.5222   ,\n",
+    "         17.707508 ,  14.86889  ,  19.914494 ,  19.885508 ,  19.914463 ,\n",
+    "          0.       ]], dtype=float32)]'''\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from AimbotEnv import Aimbot\n",
+    "\n",
+    "ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
+    "WORKER_ID = 1\n",
+    "BASE_PORT = 2002\n",
+    "\n",
+    "env = Aimbot(envPath=ENV_PATH,workerID= WORKER_ID,basePort= BASE_PORT)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([[  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       , -15.994009 ,   1.       , -26.322788 ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           2.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.3519633,   1.6946528,\n",
+       "           2.3051548,   3.673389 ,   9.067246 ,  17.521473 ,  21.727095 ,\n",
+       "          22.753294 ,  24.167128 ,  25.905216 ,  18.35725  ,  21.02278  ,\n",
+       "          21.053417 ,   0.       , -15.994003 ,   1.       , -26.322784 ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.3519667,\n",
+       "           1.6946585,   2.3051722,   3.6734192,   9.067533 ,  21.145092 ,\n",
+       "          21.727148 ,  22.753365 ,  24.167217 ,  25.905317 ,  18.358263 ,\n",
+       "          21.022812 ,  21.053455 ,   0.       ],\n",
+       "        [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,  -1.8809433,   1.       , -25.66834  ,   1.       ,\n",
+       "           2.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,  16.768637 ,  23.414627 ,\n",
+       "          22.04486  ,  21.050663 ,  20.486784 ,  20.486784 ,  21.050665 ,\n",
+       "          15.049731 ,  11.578419 ,   9.695194 ,  20.398016 ,  20.368341 ,\n",
+       "          20.398016 ,   0.       ,  -1.8809433,   1.       , -25.66834  ,\n",
+       "           1.       ,   1.       ,   2.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   2.       ,\n",
+       "           2.       ,   1.       ,   1.       ,   1.       ,  25.098585 ,\n",
+       "          15.749494 ,  22.044899 ,  21.050697 ,  20.486813 ,  20.486813 ,\n",
+       "          21.050694 ,  15.049746 ,   3.872317 ,   3.789325 ,  20.398046 ,\n",
+       "          20.368372 ,  20.398046 ,   0.       ],\n",
+       "        [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       , -13.672583 ,   1.       , -26.479263 ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   5.3249803,   6.401276 ,\n",
+       "           8.374101 ,  12.8657875,  21.302414 ,  21.30242  ,  21.888742 ,\n",
+       "          22.92251  ,  24.346794 ,  26.09773  ,  21.210114 ,  21.179258 ,\n",
+       "          21.210117 ,   0.       , -13.672583 ,   1.       , -26.479263 ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   2.       ,   1.       ,   1.       ,\n",
+       "           2.       ,   1.       ,   1.       ,   2.       ,   5.3249855,\n",
+       "           6.4012837,   8.374114 ,  12.865807 ,  21.302446 ,  21.30245  ,\n",
+       "          16.168503 ,  22.922543 ,  24.346823 ,   7.1110754,  21.210148 ,\n",
+       "          21.17929  ,  12.495141 ,   0.       ],\n",
+       "        [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,  -4.9038744,   1.       , -25.185507 ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,  20.33171  ,  22.859762 ,\n",
+       "          21.522427 ,  20.551746 ,  20.00118  ,  20.001116 ,  20.551594 ,\n",
+       "          21.5222   ,  17.707508 ,  14.86889  ,  19.914494 ,  19.885508 ,\n",
+       "          19.914463 ,   0.       ,  -4.9038773,   1.       , -25.185507 ,\n",
+       "           1.       ,   2.       ,   1.       ,   2.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   2.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,  15.905993 ,\n",
+       "          22.85977  ,  11.566693 ,  20.551773 ,  20.00121  ,  20.001146 ,\n",
+       "          20.551619 ,   7.135157 ,  17.707582 ,  14.868943 ,  19.914528 ,\n",
+       "          19.88554  ,  19.914494 ,   0.       ]], dtype=float32),\n",
+       " [[-0.05], [-0.05], [-0.05], [-0.05]],\n",
+       " [[False], [False], [False], [False]])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "env.unity_observation_shape\n",
+    "(128, 4) + env.unity_observation_shape\n",
+    "env.reset()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0 0 0 0]\n",
+      " [0 0 0 0]\n",
+      " [0 0 0 0]\n",
+      " [0 0 0 0]]\n",
+      "[[0]\n",
+      " [0]\n",
+      " [0]\n",
+      " [0]]\n",
+      "[[0 0 0]\n",
+      " [0 0 0]\n",
+      " [0 0 0]\n",
+      " [0 0 0]]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "([array([  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       , -15.994009 ,   1.       , -26.322788 ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           2.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.3519633,   1.6946528,\n",
+       "           2.3051548,   3.673389 ,   9.067246 ,  17.521473 ,  21.727095 ,\n",
+       "          22.753294 ,  24.167128 ,  25.905216 ,  18.35725  ,  21.02278  ,\n",
+       "          21.053417 ,   0.       , -15.994003 ,   1.       , -26.322784 ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   2.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.3519667,\n",
+       "           1.6946585,   2.3051722,   3.6734192,   9.067533 ,  17.521563 ,\n",
+       "          21.727148 ,  22.753365 ,  24.167217 ,  25.905317 ,  18.358263 ,\n",
+       "          21.022812 ,  21.053455 ,   0.       ], dtype=float32),\n",
+       "  array([  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,  -1.8809433,   1.       , -25.66834  ,   1.       ,\n",
+       "           2.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,  16.768637 ,  23.414627 ,\n",
+       "          22.04486  ,  21.050663 ,  20.486784 ,  20.486784 ,  21.050665 ,\n",
+       "          15.049731 ,  11.578419 ,   9.695194 ,  20.398016 ,  20.368341 ,\n",
+       "          20.398016 ,   0.       ,  -1.8809433,   1.       , -25.66834  ,\n",
+       "           1.       ,   2.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,  16.768671 ,\n",
+       "          23.414669 ,  22.044899 ,  21.050697 ,  20.486813 ,  20.486813 ,\n",
+       "          21.050694 ,  15.049746 ,  11.578423 ,   9.695195 ,  20.398046 ,\n",
+       "          20.368372 ,  20.398046 ,   0.       ], dtype=float32),\n",
+       "  array([  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       , -13.672583 ,   1.       , -26.479263 ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   5.3249803,   6.401276 ,\n",
+       "           8.374101 ,  12.8657875,  21.302414 ,  21.30242  ,  21.888742 ,\n",
+       "          22.92251  ,  24.346794 ,  26.09773  ,  21.210114 ,  21.179258 ,\n",
+       "          21.210117 ,   0.       , -13.672583 ,   1.       , -26.479263 ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   5.3249855,\n",
+       "           6.4012837,   8.374114 ,  12.865807 ,  21.302446 ,  21.30245  ,\n",
+       "          21.888773 ,  22.922543 ,  24.346823 ,  26.097757 ,  21.210148 ,\n",
+       "          21.17929  ,  21.21015  ,   0.       ], dtype=float32),\n",
+       "  array([  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,   0.       ,   0.       ,   0.       ,   0.       ,\n",
+       "           0.       ,  -4.9038744,   1.       , -25.185507 ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,  20.33171  ,  22.859762 ,\n",
+       "          21.522427 ,  20.551746 ,  20.00118  ,  20.001116 ,  20.551594 ,\n",
+       "          21.5222   ,  17.707508 ,  14.86889  ,  19.914494 ,  19.885508 ,\n",
+       "          19.914463 ,   0.       ,  -4.9038773,   1.       , -25.185507 ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,   1.       ,\n",
+       "           1.       ,   1.       ,   1.       ,   1.       ,  20.331783 ,\n",
+       "          22.85977  ,  21.522448 ,  20.551773 ,  20.00121  ,  20.001146 ,\n",
+       "          20.551619 ,  21.522217 ,  17.707582 ,  14.868943 ,  19.914528 ,\n",
+       "          19.88554  ,  19.914494 ,   0.       ], dtype=float32)],\n",
+       " [[-0.05], [-0.05], [-0.05], [-0.05]],\n",
+       " [[False], [False], [False], [False]])"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "actions = np.zeros_like(np.arange(16).reshape(4, 4))\n",
+    "print(actions)\n",
+    "env.step(actions)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.7 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Aimbot-PPO-Python/Tensorflow/GAIL-Main.ipynb
+++ b/Aimbot-PPO-Python/Tensorflow/GAIL-Main.ipynb