diff --git a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
index 6f97cca..8f5bd46 100644
--- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
+++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
@@ -11,6 +11,7 @@ from mlagents_envs.side_channel.side_channel import (
     IncomingMessage,
     OutgoingMessage,
 )
+from arguments import set_save_model
 
 
 class Aimbot(gym.Env):
@@ -176,18 +177,21 @@ class AimbotSideChannel(SideChannel):
         "Warning|Message1|Message2|Message3" or
         "Error|Message1|Message2|Message3"
         """
-        this_message = msg.read_string()
-        this_result = this_message.split("|")
-        print(this_result)
-        if this_result[0] == "Warning":
-            if this_result[1] == "Result":
-                airecorder.total_rounds[this_result[2]] += 1
-                if this_result[3] == "Win":
-                    airecorder.win_rounds[this_result[2]] += 1
+        this_message_Original = msg.read_string()
+        this_message = this_message_Original.split("|")
+        print(this_message)
+        if this_message[0] == "Warning":
+            if this_message[1] == "Result":
+                airecorder.total_rounds[this_message[2]] += 1
+                if this_message[3] == "Win":
+                    airecorder.win_rounds[this_message[2]] += 1
                 # print(TotalRounds)
                 # print(WinRounds)
-        elif this_result[0] == "Error":
-            print(this_message)
+            if this_message[1] == "Command":
+                set_save_model(True)
+                print("Command: " + this_message_Original)
+        elif this_message[0] == "Error":
+            print(this_message_Original)
         # # while Message type is Warning
         # if(thisResult[0] == "Warning"):
         #     # while Message1 is result means one game is over
diff --git a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb
index f4a4a60..60f7284 100644
--- a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb
+++ b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb
@@ -181,30 +181,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[0., 0., 0., 0.],\n",
-       "       [0., 0., 0., 0.],\n",
-       "       [0., 0., 0., 0.],\n",
-       "       [0., 0., 0., 0.],\n",
-       "       [0., 0., 0., 0.],\n",
-       "       [0., 0., 0., 0.],\n",
-       "       [0., 0., 0., 0.],\n",
-       "       [0., 0., 0., 0.]])"
+       "3"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import numpy as np\n",
-    "np.zeros((8, 4))"
+    "y=\"a;b;c\"\n",
+    "len(y.split(\";\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2]\n"
+     ]
+    }
+   ],
+   "source": [
+    "a = np.array([1,2,3,4])\n",
+    "print(a[[False,True,False,False]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1, 2, 3, 4}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = {1,2,3}\n",
+    "a.add(4)\n",
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([3, 4])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = np.array([[1,3],[2,4]])\n",
+    "a.max(axis=1)\n"
    ]
   }
  ],
diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
index f32a1bd..84c56af 100644
--- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
+++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
@@ -4,6 +4,7 @@ import random
 import uuid
 import torch
 import atexit
+import os
 
 from aimbotEnv import Aimbot
 from aimbotEnv import AimbotSideChannel
@@ -12,13 +13,14 @@ from airecorder import WandbRecorder
 from aimemory import PPOMem
 from aimemory import Targets
 from arguments import parse_args
+from arguments import set_save_model, is_save_model
 import torch.optim as optim
 
 # side channel uuid
 SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
 # tensorboard names
-GAME_NAME = "Aimbot_Hybrid_V3"
-GAME_TYPE = "Mix_Verification"
+GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel"
+GAME_TYPE = "GotoOnly-Level2345"
 
 if __name__ == "__main__":
     args = parse_args()
@@ -57,16 +59,6 @@ if __name__ == "__main__":
     run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}"
     wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args)
 
-    @atexit.register
-    def save_model():
-        # close env
-        env.close()
-        if args.save_model:
-            # save model while exit
-            save_dir = "../PPO-Model/" + run_name + "_last.pt"
-            torch.save(agent, save_dir)
-            print("save model to " + save_dir)
-
     # start the game
     total_update_step = args.target_num * args.total_timesteps // args.datasetSize
     target_steps = [0 for i in range(args.target_num)]
@@ -222,11 +214,16 @@ if __name__ == "__main__":
             )
             # print cost time as seconds
             print("cost time:", time.time() - start_time)
-            # New Record!
-            if TotalRewardMean > best_reward and args.save_model:
-                best_reward = target_reward_mean
-                saveDir = "../PPO-Model/" + run_name + "_" + str(TotalRewardMean) + ".pt"
-                torch.save(agent, saveDir)
+            # New Record! or save model
+            if ((is_save_model() or TotalRewardMean > best_reward) and args.save_model):
+                # check saveDir is exist
+                saveDir = "../PPO-Model/" + run_name + "/"
+                if not os.path.isdir(saveDir):
+                    os.mkdir(saveDir)
+                best_reward = TotalRewardMean
+                torch.save(agent, saveDir + str(TotalRewardMean) + ".pt")
+                print("Model Saved!")
+                set_save_model(False)
         else:
             # train mode off
             mean_reward_list = []  # for WANDB
@@ -249,7 +246,10 @@ if __name__ == "__main__":
             TotalRewardMean = np.mean(mean_reward_list)
             wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
 
-    saveDir = "../PPO-Model/" + run_name + "_last.pt"
-    torch.save(agent, saveDir)
+    saveDir = "../PPO-Model/" + run_name + "/"
+    if not os.path.isdir(saveDir):
+        os.mkdir(saveDir)
+    best_reward = target_reward_mean
+    torch.save(agent, saveDir + "_last.pt")
     env.close()
     wdb_recorder.writer.close()
diff --git a/Aimbot-PPO-Python/Pytorch/arguments.py b/Aimbot-PPO-Python/Pytorch/arguments.py
index a7b78cb..bcfe8fb 100644
--- a/Aimbot-PPO-Python/Pytorch/arguments.py
+++ b/Aimbot-PPO-Python/Pytorch/arguments.py
@@ -4,21 +4,19 @@ import uuid
 from distutils.util import strtobool
 
 DEFAULT_SEED = 9331
-ENV_PATH = "../Build/3.1.6/Aimbot-ParallelEnv"
+ENV_PATH = "../Build/3.4/Aimbot-ParallelEnv"
 WAND_ENTITY = "koha9"
 WORKER_ID = 1
 BASE_PORT = 1000
 
 # tensorboard names
-GAME_NAME = "Aimbot_Target_Hybrid_Full_MNN_V1"
-GAME_TYPE = "Mix_Train"
 
 # max round steps per agent is 2500/Decision_period, 25 seconds
 TOTAL_STEPS = 3150000
 BATCH_SIZE = 512
 MAX_TRAINNING_DATASETS = 6000
 DECISION_PERIOD = 1
-LEARNING_RATE = 6.5e-4
+LEARNING_RATE = 1.5e-4
 GAMMA = 0.99
 GAE_LAMBDA = 0.95
 EPOCHS = 3
@@ -27,18 +25,17 @@ LOSS_COEF = [1.0, 1.0, 1.0, 1.0] # free go attack defence
 POLICY_COEF = [1.0, 1.0, 1.0, 1.0]
 ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05]
 CRITIC_COEF = [0.5, 0.5, 0.5, 0.5]
-TARGET_LEARNING_RATE = 1e-5
+TARGET_LEARNING_RATE = 1e-6
 
 FREEZE_VIEW_NETWORK = False
-BROADCASTREWARD = False
 ANNEAL_LEARNING_RATE = True
 CLIP_VLOSS = True
 NORM_ADV = False
 TRAIN = True
-SAVE_MODEL = False
-WANDB_TACK = False
+SAVE_MODEL = True
+WANDB_TACK = True
 LOAD_DIR = None
-#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
+LOAD_DIR = "../PPO-Model/GotoOnly-Level1234_9331_1697122986/8.853553.pt"
 
 # Unity Environment Parameters
 TARGET_STATE_SIZE = 6
@@ -53,6 +50,16 @@ TARGETNUM= 4
 ENV_TIMELIMIT = 30
 RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT
 
+save_model_this_episode = False
+
+def is_save_model():
+    global save_model_this_episode
+    return save_model_this_episode
+def set_save_model(save_model:bool):
+    print("set save model to ",save_model)
+    global save_model_this_episode
+    save_model_this_episode = save_model
+
 def parse_args():
     # fmt: off
     # pytorch and environment parameters
@@ -97,12 +104,10 @@ def parse_args():
                         help="the number of steps to run in each environment per policy rollout")
     parser.add_argument("--result-broadcast-ratio", type=float, default=RESULT_BROADCAST_RATIO,
                         help="broadcast result when win round is reached,r=result-broadcast-ratio*remainTime")
-    parser.add_argument("--broadCastEndReward", type=lambda x: bool(strtobool(x)), default=BROADCASTREWARD, nargs="?", const=True,
-                        help="save model or not")
     # target_learning_rate
     parser.add_argument("--target-lr", type=float, default=TARGET_LEARNING_RATE,
                         help="target value of downscaling the learning rate")
-    
+
     # POLICY_COEF ENTROPY_COEF CRITIC_COEF LOSS_COEF
     parser.add_argument("--policy-coef", type=float, default=POLICY_COEF,
                         help="coefficient of the policy loss")
diff --git a/Aimbot-PPO-Python/Pytorch/ppoagent.py b/Aimbot-PPO-Python/Pytorch/ppoagent.py
index aa71166..74232a7 100644
--- a/Aimbot-PPO-Python/Pytorch/ppoagent.py
+++ b/Aimbot-PPO-Python/Pytorch/ppoagent.py
@@ -49,9 +49,9 @@ class PPOAgent(nn.Module):
         self.hidden_networks = nn.ModuleList(
             [
                 nn.Sequential(
-                    layer_init(nn.Linear(self.state_size, 128)),
+                    layer_init(nn.Linear(self.state_size, 256)),
                     nn.LeakyReLU(),
-                    layer_init(nn.Linear(128, 64)),
+                    layer_init(nn.Linear(256, 128)),
                     nn.LeakyReLU(),
                     )
                 for i in range(self.target_num)
@@ -59,16 +59,16 @@ class PPOAgent(nn.Module):
         )
 
         self.actor_dis = nn.ModuleList(
-            [layer_init(nn.Linear(64, self.discrete_size), std=0.5) for i in range(self.target_num)]
+            [layer_init(nn.Linear(128, self.discrete_size), std=0.5) for i in range(self.target_num)]
         )
         self.actor_mean = nn.ModuleList(
-            [layer_init(nn.Linear(64, self.continuous_size), std=0.5) for i in range(self.target_num)]
+            [layer_init(nn.Linear(128, self.continuous_size), std=0) for i in range(self.target_num)]
         )
         self.actor_logstd = nn.ParameterList(
             [nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]
         )
         self.critic = nn.ModuleList(
-            [layer_init(nn.Linear(64, 1), std=1) for i in range(self.target_num)]
+            [layer_init(nn.Linear(128, 1), std=0) for i in range(self.target_num)]
         )
 
     def get_value(self, state: torch.Tensor):