diff --git a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
index 7c8466d..0ca1a59 100644
--- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
+++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
@@ -12,12 +12,13 @@ class Aimbot(gym.Env):
         envPath: str,
         workerID: int = 1,
         basePort: int = 100,
+        side_channels: list = []
     ):
         super(Aimbot, self).__init__()
         self.env = UnityEnvironment(
             file_name=envPath,
             seed=1,
-            side_channels=[],
+            side_channels=side_channels,
             worker_id=workerID,
             base_port=basePort,
         )
diff --git a/Aimbot-PPO-Python/Pytorch/ppo.py b/Aimbot-PPO-Python/Pytorch/ppo.py
index b2c6cbf..ebd9886 100644
--- a/Aimbot-PPO-Python/Pytorch/ppo.py
+++ b/Aimbot-PPO-Python/Pytorch/ppo.py
@@ -3,6 +3,7 @@ import wandb
 import time
 import numpy as np
 import random
+import uuid
 import torch
 import torch.nn as nn
 import torch.optim as optim
@@ -13,22 +14,31 @@ from torch.distributions.normal import Normal
 from torch.distributions.categorical import Categorical
 from distutils.util import strtobool
 from torch.utils.tensorboard import SummaryWriter
+from mlagents_envs.environment import UnityEnvironment
+from mlagents_envs.side_channel.side_channel import (
+    SideChannel,
+    IncomingMessage,
+    OutgoingMessage,
+)
+from typing import List
 
 bestReward = 0
 
 DEFAULT_SEED = 9331
-ENV_PATH = "../Build/Build-ParallelEnv-BigArea-6Enemy-EndBonus/Aimbot-ParallelEnv"
+ENV_PATH = "../Build/Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel/Aimbot-ParallelEnv"
+SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
 WAND_ENTITY = "koha9"
 WORKER_ID = 1
 BASE_PORT = 1000
 
-# max round steps per agent is 2500, 25 seconds
+# max round steps per agent is 2500/Decision_period, 25 seconds
+# !!!check every parameters before run!!!
 
 TOTAL_STEPS = 4000000
 BATCH_SIZE = 512
 MAX_TRAINNING_DATASETS = 8000
-DECISION_PERIOD = 2
-LEARNING_RATE = 7e-4
+DECISION_PERIOD = 1
+LEARNING_RATE = 1e-3
 GAMMA = 0.99
 GAE_LAMBDA = 0.95
 EPOCHS = 4
@@ -37,14 +47,19 @@ POLICY_COEF = 1.0
 ENTROPY_COEF = 0.01
 CRITIC_COEF = 0.5
 
-ANNEAL_LEARNING_RATE = False
+ANNEAL_LEARNING_RATE = True
 CLIP_VLOSS = True
 NORM_ADV = True
-TRAIN = False
+TRAIN = True
 
-WANDB_TACK = False
+WANDB_TACK = True
 LOAD_DIR = None
-LOAD_DIR = "../PPO-Model/bigArea-4.pt"
+LOAD_DIR = "../PPO-Model/Aimbot-target-last.pt"
+
+# public data
+TotalRounds = {"Go":0,"Attack":0,"Free":0}
+WinRounds = {"Go":0,"Attack":0,"Free":0}
+
 
 def parse_args():
     # fmt: off
@@ -127,9 +142,11 @@ class PPOAgent(nn.Module):
         self.continuous_size = env.unity_continuous_size
 
         self.network = nn.Sequential(
-            layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 384)),
+            layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 700)),
             nn.ReLU(),
-            layer_init(nn.Linear(384, 256)),
+            layer_init(nn.Linear(700, 500)),
+            nn.ReLU(),
+            layer_init(nn.Linear(500, 256)),
             nn.ReLU(),
         )
         self.actor_dis = layer_init(nn.Linear(256, self.discrete_size), std=0.01)
@@ -213,6 +230,52 @@ def GAE(agent, args, rewards, dones, values, next_obs, next_done):
             advantages = returns - values
     return advantages, returns
 
+class AimbotSideChannel(SideChannel):
+    def __init__(self, channel_id: uuid.UUID) -> None:
+        super().__init__(channel_id)
+    def on_message_received(self, msg: IncomingMessage) -> None:
+        """
+        Note: We must implement this method of the SideChannel interface to
+        receive messages from Unity
+        """
+        thisMessage = msg.read_string()
+        print(thisMessage)
+        thisResult = thisMessage.split("|")
+        if(thisResult[0] == "result"):
+            TotalRounds[thisResult[1]]+=1
+            if(thisResult[2] == "Win"):
+                WinRounds[thisResult[1]]+=1
+            print(TotalRounds)
+            print(WinRounds)
+        elif(thisResult[0] == "Error"):
+            print(thisMessage)
+	# 发送函数
+    def send_string(self, data: str) -> None:
+        """发送一个字符串给C#"""
+        msg = OutgoingMessage()
+        msg.write_string(data)
+        super().queue_message_to_send(msg)
+
+    def send_bool(self, data: bool) -> None:
+        msg = OutgoingMessage()
+        msg.write_bool(data)
+        super().queue_message_to_send(msg)
+
+    def send_int(self, data: int) -> None:
+        msg = OutgoingMessage()
+        msg.write_int32(data)
+        super().queue_message_to_send(msg)
+
+    def send_float(self, data: float) -> None:
+        msg = OutgoingMessage()
+        msg.write_float32(data)
+        super().queue_message_to_send(msg)
+
+    def send_float_list(self, data: List[float]) -> None:
+        msg = OutgoingMessage()
+        msg.write_float32_list(data)
+        super().queue_message_to_send(msg)
+
 
 if __name__ == "__main__":
     args = parse_args()
@@ -223,7 +286,8 @@ if __name__ == "__main__":
     device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
 
     # Initialize environment anget optimizer
-    env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport)
+    aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID);
+    env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel])
     if args.load_dir is None:
         agent = PPOAgent(env).to(device)
     else:
@@ -234,8 +298,9 @@ if __name__ == "__main__":
     optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
 
     # Tensorboard and WandB Recorder
-    game_name = "Aimbot-BigArea-6Enemy-EndBonus"
-    run_name = f"{game_name}_{args.seed}_{int(time.time())}"
+    game_name = "Aimbot_Target"
+    game_type = "OffPolicy"
+    run_name = f"{game_name}_{game_type}_{args.seed}_{int(time.time())}"
     if args.wandb_track:
         wandb.init(
             project=game_name,
@@ -326,6 +391,7 @@ if __name__ == "__main__":
                     if next_done[i] == True:
                         # finished a round, send finished memories to training datasets
                         # compute advantage and discounted reward
+                        print(i,"over")
                         adv, rt = GAE(
                             agent,
                             args,
@@ -357,7 +423,7 @@ if __name__ == "__main__":
                         rewards_bf[i] = []
                         dones_bf[i] = []
                         values_bf[i] = []
-                        print(f"train dataset:{obs.size()[0]}/{args.datasetSize}")
+                        print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}")
 
                 if obs.size()[0] >= args.datasetSize:
                     # start train NN
@@ -365,10 +431,11 @@ if __name__ == "__main__":
                 state, done = next_state, next_done
             else:
                 # skip this step use last predict action
-                next_obs, reward, done = env.step(action_cpu)
+                next_obs, reward, next_done = env.step(action_cpu)
                 # save memories
                 for i in range(env.unity_agent_num):
                     if next_done[i] == True:
+                        print(i,"over???")
                         # save last memories to buffers
                         ob_bf[i].append(state[i])
                         act_bf[i].append(action_cpu[i])
@@ -410,7 +477,7 @@ if __name__ == "__main__":
                         rewards_bf[i] = []
                         dones_bf[i] = []
                         values_bf[i] = []
-                        print(f"train dataset:{obs.size()[0]}/{args.datasetSize}")
+                        print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}")
                 state, done = next_state, next_done
             i += 1
 
@@ -530,9 +597,12 @@ if __name__ == "__main__":
                 "charts/SPS", int(global_step / (time.time() - start_time)), global_step
             )
             writer.add_scalar("charts/Reward", rewardsMean, global_step)
+            writer.add_scalar("charts/GoWinRatio", WinRounds["Go"]/TotalRounds["Go"], global_step)
+            writer.add_scalar("charts/AttackWinRatio", WinRounds["Attack"]/TotalRounds["Attack"], global_step)
+            writer.add_scalar("charts/FreeWinRatio", WinRounds["Free"]/TotalRounds["Free"], global_step)
             if rewardsMean > bestReward:
                 bestReward = rewardsMean
-                saveDir = "../PPO-Model/bigArea-384-128-hybrid-" + str(rewardsMean) + ".pt"
+                saveDir = "../PPO-Model/Target-700-500-256-hybrid-" + str(rewardsMean) + ".pt"
                 torch.save(agent, saveDir)
 
     env.close()
diff --git a/Aimbot-PPO-Python/Pytorch/testarea.ipynb b/Aimbot-PPO-Python/Pytorch/testarea.ipynb
index 9fbc548..c89e5d1 100644
--- a/Aimbot-PPO-Python/Pytorch/testarea.ipynb
+++ b/Aimbot-PPO-Python/Pytorch/testarea.ipynb
@@ -525,28 +525,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "start 0\n",
-      "end 3\n",
-      "start 3\n",
-      "end 6\n",
-      "start 6\n",
-      "end 9\n",
-      "start 9\n",
-      "end 12\n"
+      "{'Go': 1, 'Attack': 0, 'Free': 0}\n"
      ]
     }
    ],
    "source": [
-    "for i in range(0,10,3):\n",
-    "    print(\"start\",i)\n",
-    "    print('end',i+3)"
+    "Total = {\"Go\":0,\"Attack\":0,\"Free\":0}\n",
+    "\n",
+    "Total[\"Go\"] +=1\n",
+    "print(Total)"
    ]
   }
  ],