From f9ee51c25660ae620afea5dff99288520b3f00ca Mon Sep 17 00:00:00 2001
From: Koha9 <UCUNICORN@Hotmail.com>
Date: Sat, 29 Jul 2023 22:40:03 +0900
Subject: [PATCH] =?UTF-8?q?=E5=AF=B9=E5=BA=94V3.1.6=20=E8=AE=AD=E7=BB=83?=
 =?UTF-8?q?=E6=A8=A1=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

主要修改SideChannel以对应V316的训练模式
规则化命名
---
 Aimbot-PPO-Python/Pytorch/.idea/.gitignore    |   3 +
 Aimbot-PPO-Python/Pytorch/.idea/Pytorch.iml   |   8 ++
 .../Pytorch/.idea/dictionaries/UCUNI.xml      |   7 +
 .../inspectionProfiles/profiles_settings.xml  |   6 +
 Aimbot-PPO-Python/Pytorch/.idea/misc.xml      |   4 +
 Aimbot-PPO-Python/Pytorch/.idea/modules.xml   |   8 ++
 Aimbot-PPO-Python/Pytorch/.idea/vcs.xml       |   6 +
 Aimbot-PPO-Python/Pytorch/AimbotEnv.py        |  84 ++++++------
 Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py      | 129 +++++++++---------
 Aimbot-PPO-Python/Pytorch/airecorder.py       |  19 ++-
 Aimbot-PPO-Python/Pytorch/arguments.py        |   2 +-
 Aimbot-PPO-Python/Pytorch/ppoagent.py         |   3 +-
 12 files changed, 166 insertions(+), 113 deletions(-)
 create mode 100644 Aimbot-PPO-Python/Pytorch/.idea/.gitignore
 create mode 100644 Aimbot-PPO-Python/Pytorch/.idea/Pytorch.iml
 create mode 100644 Aimbot-PPO-Python/Pytorch/.idea/dictionaries/UCUNI.xml
 create mode 100644 Aimbot-PPO-Python/Pytorch/.idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 Aimbot-PPO-Python/Pytorch/.idea/misc.xml
 create mode 100644 Aimbot-PPO-Python/Pytorch/.idea/modules.xml
 create mode 100644 Aimbot-PPO-Python/Pytorch/.idea/vcs.xml
diff --git a/Aimbot-PPO-Python/Pytorch/.idea/.gitignore b/Aimbot-PPO-Python/Pytorch/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/Aimbot-PPO-Python/Pytorch/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/Aimbot-PPO-Python/Pytorch/.idea/Pytorch.iml b/Aimbot-PPO-Python/Pytorch/.idea/Pytorch.iml
new file mode 100644
index 0000000..c322a37
--- /dev/null
+++ b/Aimbot-PPO-Python/Pytorch/.idea/Pytorch.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="mlagents39" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/Aimbot-PPO-Python/Pytorch/.idea/dictionaries/UCUNI.xml b/Aimbot-PPO-Python/Pytorch/.idea/dictionaries/UCUNI.xml
new file mode 100644
index 0000000..0a09ad1
--- /dev/null
+++ b/Aimbot-PPO-Python/Pytorch/.idea/dictionaries/UCUNI.xml
@@ -0,0 +1,7 @@
+<component name="ProjectDictionaryState">
+  <dictionary name="UCUNI">
+    <words>
+      <w>aimbot</w>
+    </words>
+  </dictionary>
+</component>
\ No newline at end of file
diff --git a/Aimbot-PPO-Python/Pytorch/.idea/inspectionProfiles/profiles_settings.xml b/Aimbot-PPO-Python/Pytorch/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/Aimbot-PPO-Python/Pytorch/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/Aimbot-PPO-Python/Pytorch/.idea/misc.xml b/Aimbot-PPO-Python/Pytorch/.idea/misc.xml
new file mode 100644
index 0000000..8093b2d
--- /dev/null
+++ b/Aimbot-PPO-Python/Pytorch/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="mlagents39" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/Aimbot-PPO-Python/Pytorch/.idea/modules.xml b/Aimbot-PPO-Python/Pytorch/.idea/modules.xml
new file mode 100644
index 0000000..45a8e5b
--- /dev/null
+++ b/Aimbot-PPO-Python/Pytorch/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Pytorch.iml" filepath="$PROJECT_DIR$/.idea/Pytorch.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/Aimbot-PPO-Python/Pytorch/.idea/vcs.xml b/Aimbot-PPO-Python/Pytorch/.idea/vcs.xml
new file mode 100644
index 0000000..b2bdec2
--- /dev/null
+++ b/Aimbot-PPO-Python/Pytorch/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
index 1a4baca..6f43799 100644
--- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
+++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
@@ -15,19 +15,19 @@ from mlagents_envs.side_channel.side_channel import (
 
 class Aimbot(gym.Env):
     def __init__(
-        self,
-        envPath: str,
-        workerID: int = 1,
-        basePort: int = 100,
-        side_channels: list = []
+            self,
+            env_path: str,
+            worker_id: int = 1,
+            base_port: int = 100,
+            side_channels: list = []
     ):
         super(Aimbot, self).__init__()
         self.env = UnityEnvironment(
-            file_name=envPath,
+            file_name=env_path,
             seed=1,
             side_channels=side_channels,
-            worker_id=workerID,
-            base_port=basePort,
+            worker_id=worker_id,
+            base_port=base_port,
         )
         self.env.reset()
         # all behavior_specs
@@ -41,7 +41,7 @@ class Aimbot(gym.Env):
         #  environment action specs
         self.unity_action_spec = self.unity_specs.action_spec
         #  environment sample observation
-        decisionSteps, _ = self.env.get_steps(self.unity_beha_name)
+        decision_steps, _ = self.env.get_steps(self.unity_beha_name)
 
         # OBSERVATION SPECS
         #  environment state shape. like tuple:(93,)
@@ -64,31 +64,31 @@ class Aimbot(gym.Env):
 
         # AGENT SPECS
         # all agents ID
-        self.unity_agent_IDS = decisionSteps.agent_id
+        self.unity_agent_IDS = decision_steps.agent_id
         # agents number
         self.unity_agent_num = len(self.unity_agent_IDS)
 
-    def reset(self)->Tuple[np.ndarray, List, List]:
-        """reset enviroment and get observations
+    def reset(self) -> Tuple[np.ndarray, List, List]:
+        """reset environment and get observations
 
         Returns:
-            ndarray: nextState, reward, done, loadDir, saveNow
+            ndarray: next_state, reward, done, loadDir, saveNow
         """
         # reset env
         self.env.reset()
-        nextState, reward, done = self.get_steps()
-        return nextState, reward, done
+        next_state, reward, done = self.get_steps()
+        return next_state, reward, done
 
     # TODO:
     # delete all stack state DONE
-    # getstep State disassembly function DONE
+    # get-step State disassembly function DONE
     # delete agent selection function DONE
     # self.step action wrapper function DONE
     def step(
-        self,
-        actions: ndarray,
-    )->Tuple[np.ndarray, List, List]:
-        """change ations list to ActionTuple then send it to enviroment
+            self,
+            actions: ndarray,
+    ) -> Tuple[np.ndarray, List, List]:
+        """change actions list to ActionTuple then send it to environment
 
         Args:
             actions (ndarray): PPO chooseAction output action list.(agentNum,actionNum)
@@ -96,36 +96,36 @@ class Aimbot(gym.Env):
         Returns:
             ndarray: nextState, reward, done
         """
-        # take action to enviroment
+        # take action to environment
         # return mextState,reward,done
         # discrete action
         if self.unity_dis_act_exist:
             # create discrete action from actions list
-            discreteActions = actions[:, 0 : self.unity_discrete_type]
+            discrete_actions = actions[:, 0: self.unity_discrete_type]
         else:
             # create empty discrete action
-            discreteActions = np.asarray([[0]])
+            discrete_actions = np.asarray([[0]])
         # continuous action
         if self.unity_con_act_exist:
             # create continuous actions from actions list
-            continuousActions = actions[:, self.unity_discrete_type :]
+            continuous_actions = actions[:, self.unity_discrete_type:]
         else:
             # create empty continuous action
-            continuousActions = np.asanyarray([[0.0]])
+            continuous_actions = np.asanyarray([[0.0]])
 
         # Dummy continuous action
         # continuousActions = np.asanyarray([[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]])
         # create actionTuple
-        thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
+        this_action_tuple = ActionTuple(continuous=continuous_actions, discrete=discrete_actions)
         # take action to env
-        self.env.set_actions(behavior_name=self.unity_beha_name, action=thisActionTuple)
+        self.env.set_actions(behavior_name=self.unity_beha_name, action=this_action_tuple)
         self.env.step()
         # get nextState & reward & done after this action
-        nextStates, rewards, dones = self.get_steps()
-        return nextStates, rewards, dones
+        next_states, rewards, dones = self.get_steps()
+        return next_states, rewards, dones
 
-    def get_steps(self)->Tuple[np.ndarray, List, List]:
-        """get enviroment now observations.
+    def get_steps(self) -> Tuple[np.ndarray, List, List]:
+        """get environment now observations.
         Include State, Reward, Done
 
         Args:
@@ -160,6 +160,7 @@ class Aimbot(gym.Env):
     def close(self):
         self.env.close()
 
+
 class AimbotSideChannel(SideChannel):
     def __init__(self, channel_id: uuid.UUID) -> None:
         super().__init__(channel_id)
@@ -174,13 +175,15 @@ class AimbotSideChannel(SideChannel):
         """
         this_message = msg.read_string()
         this_result = this_message.split("|")
-        if(this_result[0] == "result"):
-            airecorder.total_rounds[this_result[1]]+=1
-            if(this_result[2] == "Win"):
-                airecorder.win_rounds[this_result[1]]+=1
-            #print(TotalRounds)
-            #print(WinRounds)
-        elif(this_result[0] == "Error"):
+        print(this_result)
+        if this_result[0] == "Warning":
+            if this_result[1] == "Result":
+                airecorder.total_rounds[this_result[2]] += 1
+                if this_result[3] == "Win":
+                    airecorder.win_rounds[this_result[2]] += 1
+                # print(TotalRounds)
+                # print(WinRounds)
+        elif this_result[0] == "Error":
             print(this_message)
         # # while Message type is Warning
         # if(thisResult[0] == "Warning"):
@@ -197,7 +200,8 @@ class AimbotSideChannel(SideChannel):
         # # while Message type is Error
         # elif(thisResult[0] == "Error"):
         #     print(thisMessage)
-	# 发送函数
+
+    # 发送函数
     def send_string(self, data: str) -> None:
         # send a string toC#
         msg = OutgoingMessage()
@@ -222,4 +226,4 @@ class AimbotSideChannel(SideChannel):
     def send_float_list(self, data: List[float]) -> None:
         msg = OutgoingMessage()
         msg.write_float32_list(data)
-        super().queue_message_to_send(msg)
\ No newline at end of file
+        super().queue_message_to_send(msg)
diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
index b390b6a..db157d7 100644
--- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
+++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
@@ -32,15 +32,18 @@ if __name__ == "__main__":
     device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
     best_reward = -1
 
-    # Initialize environment anget optimizer
-    aimbot_sidechannel = AimbotSideChannel(SIDE_CHANNEL_UUID);
-    env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimbot_sidechannel])
+    # Initialize environment agent optimizer
+    aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID)
+    env = Aimbot(env_path=args.path,
+                 worker_id=args.workerID,
+                 base_port=args.baseport,
+                 side_channels=[aimbot_side_channel])
     if args.load_dir is None:
         agent = PPOAgent(
-            env = env,
+            env=env,
             this_args=args,
             device=device,
-            ).to(device)
+        ).to(device)
     else:
         agent = torch.load(args.load_dir)
         # freeze
@@ -48,7 +51,7 @@ if __name__ == "__main__":
             # freeze the view network
             for p in agent.viewNetwork.parameters():
                 p.requires_grad = False
-            print("VIEW NETWORK FREEZED")
+            print("VIEW NETWORK FREEZE")
         print("Load Agent", args.load_dir)
         print(agent.eval())
     # optimizer
@@ -57,16 +60,18 @@ if __name__ == "__main__":
     run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}"
     wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args)
 
+
     @atexit.register
     def save_model():
         # close env
         env.close()
         if args.save_model:
             # save model while exit
-            save_dir = "../PPO-Model/"+ run_name + "_last.pt"
+            save_dir = "../PPO-Model/" + run_name + "_last.pt"
             torch.save(agent, save_dir)
             print("save model to " + save_dir)
 
+
     # start the game
     total_update_step = using_targets_num * args.total_timesteps // args.datasetSize
     target_steps = [0 for i in range(args.target_num)]
@@ -77,14 +82,14 @@ if __name__ == "__main__":
     ppo_memories = PPOMem(
         args=args,
         unity_agent_num=env.unity_agent_num,
-        device = device,
+        device=device,
     )
 
     # MAIN LOOP: run agent in environment
     for total_steps in range(total_update_step):
-        # discunt learning rate, while step == total_update_step lr will be 0
+        # discount learning rate, while step == total_update_step lr will be 0
         if args.annealLR:
-            final_lr_ratio = args.target_lr/args.lr
+            final_lr_ratio = args.target_lr / args.lr
             frac = 1.0 - ((total_steps + 1.0) / total_update_step)
             lr_now = frac * args.lr
             optimizer.param_groups[0]["lr"] = lr_now
@@ -92,13 +97,14 @@ if __name__ == "__main__":
             lr_now = args.lr
 
         # episode start show learning rate
-        print("new episode",total_steps,"learning rate = ",lr_now)
-        # MAIN LOOP: run agent in environment
+        print("new episode", total_steps, "learning rate = ", lr_now)
         step = 0
         training = False
         train_queue = []
-        last_reward = [0.for i in range(env.unity_agent_num)]
+        last_reward = [0. for i in range(env.unity_agent_num)]
+        # MAIN LOOP: run agent in environment
         while True:
+            # On decision point, choose action by agent
             if step % args.decision_period == 0:
                 step += 1
                 # Choose action by agent
@@ -119,17 +125,17 @@ if __name__ == "__main__":
 
                 # save memories
                 ppo_memories.save_memories(
-                    now_step = step,
-                    agent = agent,
-                    state = state,
-                    action_cpu = action_cpu,
-                    dis_logprob_cpu = dis_logprob_cpu,
-                    con_logprob_cpu = con_logprob_cpu,
-                    reward = reward,
-                    done = done,
-                    value_cpu = value_cpu,
-                    last_reward = last_reward,
-                    next_done = next_done,
+                    now_step=step,
+                    agent=agent,
+                    state=state,
+                    action_cpu=action_cpu,
+                    dis_logprob_cpu=dis_logprob_cpu,
+                    con_logprob_cpu=con_logprob_cpu,
+                    reward=reward,
+                    done=done,
+                    value_cpu=value_cpu,
+                    last_reward=last_reward,
+                    next_done=next_done,
                     next_state=next_state,
                 )
                 # check if any training dataset is full and ready to train
@@ -137,7 +143,7 @@ if __name__ == "__main__":
                     if ppo_memories.obs[i].size()[0] >= args.datasetSize:
                         # start train NN
                         train_queue.append(i)
-                if(len(train_queue)>0):
+                if len(train_queue) > 0:
                     # break while loop and start train
                     break
                 # update state
@@ -148,17 +154,17 @@ if __name__ == "__main__":
                 next_state, reward, next_done = env.step(action_cpu)
                 # save memories
                 ppo_memories.save_memories(
-                    now_step = step,
-                    agent = agent,
-                    state = state,
-                    action_cpu = action_cpu,
-                    dis_logprob_cpu = dis_logprob_cpu,
-                    con_logprob_cpu = con_logprob_cpu,
-                    reward = reward,
-                    done = done,
-                    value_cpu = value_cpu,
-                    last_reward = last_reward,
-                    next_done = next_done,
+                    now_step=step,
+                    agent=agent,
+                    state=state,
+                    action_cpu=action_cpu,
+                    dis_logprob_cpu=dis_logprob_cpu,
+                    con_logprob_cpu=con_logprob_cpu,
+                    reward=reward,
+                    done=done,
+                    value_cpu=value_cpu,
+                    last_reward=last_reward,
+                    next_done=next_done,
                     next_state=next_state,
                 )
                 # update state
@@ -167,12 +173,12 @@ if __name__ == "__main__":
 
         if args.train:
             # train mode on
-            mean_reward_list = [] # for WANDB
-            # loop all tarining queue
+            mean_reward_list = []  # for WANDB
+            # loop all training queue
             for this_train_ind in train_queue:
-                # sart time
+                # start time
                 start_time = time.time()
-                target_steps[this_train_ind]+=1
+                target_steps[this_train_ind] += 1
                 # train agent
                 (
                     v_loss,
@@ -180,18 +186,18 @@ if __name__ == "__main__":
                     con_pg_loss,
                     loss,
                     entropy_loss
-                    ) = agent.train_net(
-                        this_train_ind=this_train_ind,
-                        ppo_memories=ppo_memories,
-                        optimizer=optimizer
-                        )
+                ) = agent.train_net(
+                    this_train_ind=this_train_ind,
+                    ppo_memories=ppo_memories,
+                    optimizer=optimizer
+                )
                 # record mean reward before clear history
                 print("done")
-                targetRewardMean = np.mean(ppo_memories.rewards[this_train_ind].to("cpu").detach().numpy().copy())
-                mean_reward_list.append(targetRewardMean)
+                target_reward_mean = np.mean(ppo_memories.rewards[this_train_ind].to("cpu").detach().numpy().copy())
+                mean_reward_list.append(target_reward_mean)
                 targetName = Targets(this_train_ind).name
 
-                # clear this target trainning set buffer
+                # clear this target training set buffer
                 ppo_memories.clear_training_datasets(this_train_ind)
                 # record rewards for plotting purposes
                 wdb_recorder.add_target_scalar(
@@ -202,10 +208,10 @@ if __name__ == "__main__":
                     con_pg_loss,
                     loss,
                     entropy_loss,
-                    targetRewardMean,
+                    target_reward_mean,
                     target_steps,
                 )
-                print(f"episode over Target{targetName} mean reward:", targetRewardMean)
+                print(f"episode over Target{targetName} mean reward:", target_reward_mean)
             TotalRewardMean = np.mean(mean_reward_list)
             wdb_recorder.add_global_scalar(
                 TotalRewardMean,
@@ -216,31 +222,32 @@ if __name__ == "__main__":
             print("cost time:", time.time() - start_time)
             # New Record!
             if TotalRewardMean > best_reward and args.save_model:
-                best_reward = targetRewardMean
-                saveDir = "../PPO-Model/" + run_name +"_"+ str(TotalRewardMean) + ".pt"
+                best_reward = target_reward_mean
+                saveDir = "../PPO-Model/" + run_name + "_" + str(TotalRewardMean) + ".pt"
                 torch.save(agent, saveDir)
         else:
             # train mode off
-            mean_reward_list = [] # for WANDB
+            mean_reward_list = []  # for WANDB
             # while not in training mode, clear the buffer
             for this_train_ind in train_queue:
-                target_steps[this_train_ind]+=1
+                target_steps[this_train_ind] += 1
                 targetName = Targets(this_train_ind).name
-                targetRewardMean = np.mean(ppo_memories.rewards[this_train_ind].to("cpu").detach().numpy().copy())
-                mean_reward_list.append(targetRewardMean)
+                target_reward_mean = np.mean(ppo_memories.rewards[this_train_ind].to("cpu").detach().numpy().copy())
+                mean_reward_list.append(target_reward_mean)
                 print(target_steps[this_train_ind])
 
-                # clear this target trainning set buffer
+                # clear this target training set buffer
                 ppo_memories.clear_training_datasets(this_train_ind)
 
                 # record rewards for plotting purposes
-                wdb_recorder.writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[this_train_ind])
-                wdb_recorder.add_win_ratio(targetName,target_steps[this_train_ind])
-                print(f"episode over Target{targetName} mean reward:", targetRewardMean)
+                wdb_recorder.writer.add_scalar(f"Target{targetName}/Reward", target_reward_mean,
+                                               target_steps[this_train_ind])
+                wdb_recorder.add_win_ratio(targetName, target_steps[this_train_ind])
+                print(f"episode over Target{targetName} mean reward:", target_reward_mean)
             TotalRewardMean = np.mean(mean_reward_list)
             wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
 
-    saveDir = "../PPO-Model/"+ run_name + "_last.pt"
+    saveDir = "../PPO-Model/" + run_name + "_last.pt"
     torch.save(agent, saveDir)
     env.close()
     wdb_recorder.writer.close()
diff --git a/Aimbot-PPO-Python/Pytorch/airecorder.py b/Aimbot-PPO-Python/Pytorch/airecorder.py
index 3cea9df..4218687 100644
--- a/Aimbot-PPO-Python/Pytorch/airecorder.py
+++ b/Aimbot-PPO-Python/Pytorch/airecorder.py
@@ -1,7 +1,6 @@
-import wandb
-import time
 from torch.utils.tensorboard import SummaryWriter
 
+import wandb
 
 total_rounds = {"Free": 0, "Go": 0, "Attack": 0}
 win_rounds = {"Free": 0, "Go": 0, "Attack": 0}
@@ -35,7 +34,7 @@ class WandbRecorder:
     def add_target_scalar(
         self,
         target_name,
-        thisT,
+        this_t,
         v_loss,
         dis_pg_loss,
         con_pg_loss,
@@ -46,25 +45,25 @@ class WandbRecorder:
     ):
         # fmt:off
         self.writer.add_scalar(
-            f"Target{target_name}/value_loss", v_loss.item(), target_steps[thisT]
+            f"Target{target_name}/value_loss", v_loss.item(), target_steps[this_t]
         )
         self.writer.add_scalar(
-            f"Target{target_name}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT]
+            f"Target{target_name}/dis_policy_loss", dis_pg_loss.item(), target_steps[this_t]
         )
         self.writer.add_scalar(
-            f"Target{target_name}/con_policy_loss", con_pg_loss.item(), target_steps[thisT]
+            f"Target{target_name}/con_policy_loss", con_pg_loss.item(), target_steps[this_t]
         )
         self.writer.add_scalar(
-            f"Target{target_name}/total_loss", loss.item(), target_steps[thisT]
+            f"Target{target_name}/total_loss", loss.item(), target_steps[this_t]
         )
         self.writer.add_scalar(
-            f"Target{target_name}/entropy_loss", entropy_loss.item(), target_steps[thisT]
+            f"Target{target_name}/entropy_loss", entropy_loss.item(), target_steps[this_t]
         )
         self.writer.add_scalar(
-            f"Target{target_name}/Reward", target_reward_mean, target_steps[thisT]
+            f"Target{target_name}/Reward", target_reward_mean, target_steps[this_t]
         )
         self.writer.add_scalar(
-            f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps[thisT],
+            f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps[this_t],
         )
         # fmt:on
 
diff --git a/Aimbot-PPO-Python/Pytorch/arguments.py b/Aimbot-PPO-Python/Pytorch/arguments.py
index 78f58f4..65d8540 100644
--- a/Aimbot-PPO-Python/Pytorch/arguments.py
+++ b/Aimbot-PPO-Python/Pytorch/arguments.py
@@ -4,7 +4,7 @@ import uuid
 from distutils.util import strtobool
 
 DEFAULT_SEED = 9331
-ENV_PATH = "../Build/2.9/Goto-NonNormalization/Aimbot-ParallelEnv"
+ENV_PATH = "../Build/3.1.6/Aimbot-ParallelEnv"
 WAND_ENTITY = "koha9"
 WORKER_ID = 1
 BASE_PORT = 1000
diff --git a/Aimbot-PPO-Python/Pytorch/ppoagent.py b/Aimbot-PPO-Python/Pytorch/ppoagent.py
index bcc041b..dcf405d 100644
--- a/Aimbot-PPO-Python/Pytorch/ppoagent.py
+++ b/Aimbot-PPO-Python/Pytorch/ppoagent.py
@@ -65,7 +65,8 @@ class PPOAgent(nn.Module):
         self.actor_mean = nn.ModuleList(
             [layer_init(nn.Linear(200, self.continuous_size), std=0.5) for i in range(self.target_num)]
         )
-        # self.actor_logstd = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)])
+        # self.actor_logstd =
+        # nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)])
         # self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))
         self.actor_logstd = nn.ParameterList(
             [nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]