diff --git a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py index 7c8466d..0ca1a59 100644 --- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py +++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py @@ -12,12 +12,13 @@ class Aimbot(gym.Env): envPath: str, workerID: int = 1, basePort: int = 100, + side_channels: list = [] ): super(Aimbot, self).__init__() self.env = UnityEnvironment( file_name=envPath, seed=1, - side_channels=[], + side_channels=side_channels, worker_id=workerID, base_port=basePort, ) diff --git a/Aimbot-PPO-Python/Pytorch/ppo.py b/Aimbot-PPO-Python/Pytorch/ppo.py index b2c6cbf..ebd9886 100644 --- a/Aimbot-PPO-Python/Pytorch/ppo.py +++ b/Aimbot-PPO-Python/Pytorch/ppo.py @@ -3,6 +3,7 @@ import wandb import time import numpy as np import random +import uuid import torch import torch.nn as nn import torch.optim as optim @@ -13,22 +14,31 @@ from torch.distributions.normal import Normal from torch.distributions.categorical import Categorical from distutils.util import strtobool from torch.utils.tensorboard import SummaryWriter +from mlagents_envs.environment import UnityEnvironment +from mlagents_envs.side_channel.side_channel import ( + SideChannel, + IncomingMessage, + OutgoingMessage, +) +from typing import List bestReward = 0 DEFAULT_SEED = 9331 -ENV_PATH = "../Build/Build-ParallelEnv-BigArea-6Enemy-EndBonus/Aimbot-ParallelEnv" +ENV_PATH = "../Build/Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel/Aimbot-ParallelEnv" +SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e") WAND_ENTITY = "koha9" WORKER_ID = 1 BASE_PORT = 1000 -# max round steps per agent is 2500, 25 seconds +# max round steps per agent is 2500/Decision_period, 25 seconds +# !!!check every parameters before run!!! TOTAL_STEPS = 4000000 BATCH_SIZE = 512 MAX_TRAINNING_DATASETS = 8000 -DECISION_PERIOD = 2 -LEARNING_RATE = 7e-4 +DECISION_PERIOD = 1 +LEARNING_RATE = 1e-3 GAMMA = 0.99 GAE_LAMBDA = 0.95 EPOCHS = 4 @@ -37,14 +47,19 @@ POLICY_COEF = 1.0 ENTROPY_COEF = 0.01 CRITIC_COEF = 0.5 -ANNEAL_LEARNING_RATE = False +ANNEAL_LEARNING_RATE = True CLIP_VLOSS = True NORM_ADV = True -TRAIN = False +TRAIN = True -WANDB_TACK = False +WANDB_TACK = True LOAD_DIR = None -LOAD_DIR = "../PPO-Model/bigArea-4.pt" +LOAD_DIR = "../PPO-Model/Aimbot-target-last.pt" + +# public data +TotalRounds = {"Go":0,"Attack":0,"Free":0} +WinRounds = {"Go":0,"Attack":0,"Free":0} + def parse_args(): # fmt: off @@ -127,9 +142,11 @@ class PPOAgent(nn.Module): self.continuous_size = env.unity_continuous_size self.network = nn.Sequential( - layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 384)), + layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 700)), nn.ReLU(), - layer_init(nn.Linear(384, 256)), + layer_init(nn.Linear(700, 500)), + nn.ReLU(), + layer_init(nn.Linear(500, 256)), nn.ReLU(), ) self.actor_dis = layer_init(nn.Linear(256, self.discrete_size), std=0.01) @@ -213,6 +230,52 @@ def GAE(agent, args, rewards, dones, values, next_obs, next_done): advantages = returns - values return advantages, returns +class AimbotSideChannel(SideChannel): + def __init__(self, channel_id: uuid.UUID) -> None: + super().__init__(channel_id) + def on_message_received(self, msg: IncomingMessage) -> None: + """ + Note: We must implement this method of the SideChannel interface to + receive messages from Unity + """ + thisMessage = msg.read_string() + print(thisMessage) + thisResult = thisMessage.split("|") + if(thisResult[0] == "result"): + TotalRounds[thisResult[1]]+=1 + if(thisResult[2] == "Win"): + WinRounds[thisResult[1]]+=1 + print(TotalRounds) + print(WinRounds) + elif(thisResult[0] == "Error"): + print(thisMessage) + # 发送函数 + def send_string(self, data: str) -> None: + """发送一个字符串给C#""" + msg = OutgoingMessage() + msg.write_string(data) + super().queue_message_to_send(msg) + + def send_bool(self, data: bool) -> None: + msg = OutgoingMessage() + msg.write_bool(data) + super().queue_message_to_send(msg) + + def send_int(self, data: int) -> None: + msg = OutgoingMessage() + msg.write_int32(data) + super().queue_message_to_send(msg) + + def send_float(self, data: float) -> None: + msg = OutgoingMessage() + msg.write_float32(data) + super().queue_message_to_send(msg) + + def send_float_list(self, data: List[float]) -> None: + msg = OutgoingMessage() + msg.write_float32_list(data) + super().queue_message_to_send(msg) + if __name__ == "__main__": args = parse_args() @@ -223,7 +286,8 @@ if __name__ == "__main__": device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") # Initialize environment anget optimizer - env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport) + aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID); + env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel]) if args.load_dir is None: agent = PPOAgent(env).to(device) else: @@ -234,8 +298,9 @@ if __name__ == "__main__": optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5) # Tensorboard and WandB Recorder - game_name = "Aimbot-BigArea-6Enemy-EndBonus" - run_name = f"{game_name}_{args.seed}_{int(time.time())}" + game_name = "Aimbot_Target" + game_type = "OffPolicy" + run_name = f"{game_name}_{game_type}_{args.seed}_{int(time.time())}" if args.wandb_track: wandb.init( project=game_name, @@ -326,6 +391,7 @@ if __name__ == "__main__": if next_done[i] == True: # finished a round, send finished memories to training datasets # compute advantage and discounted reward + print(i,"over") adv, rt = GAE( agent, args, @@ -357,7 +423,7 @@ if __name__ == "__main__": rewards_bf[i] = [] dones_bf[i] = [] values_bf[i] = [] - print(f"train dataset:{obs.size()[0]}/{args.datasetSize}") + print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}") if obs.size()[0] >= args.datasetSize: # start train NN @@ -365,10 +431,11 @@ if __name__ == "__main__": state, done = next_state, next_done else: # skip this step use last predict action - next_obs, reward, done = env.step(action_cpu) + next_obs, reward, next_done = env.step(action_cpu) # save memories for i in range(env.unity_agent_num): if next_done[i] == True: + print(i,"over???") # save last memories to buffers ob_bf[i].append(state[i]) act_bf[i].append(action_cpu[i]) @@ -410,7 +477,7 @@ if __name__ == "__main__": rewards_bf[i] = [] dones_bf[i] = [] values_bf[i] = [] - print(f"train dataset:{obs.size()[0]}/{args.datasetSize}") + print(f"train dataset added:{obs.size()[0]}/{args.datasetSize}") state, done = next_state, next_done i += 1 @@ -530,9 +597,12 @@ if __name__ == "__main__": "charts/SPS", int(global_step / (time.time() - start_time)), global_step ) writer.add_scalar("charts/Reward", rewardsMean, global_step) + writer.add_scalar("charts/GoWinRatio", WinRounds["Go"]/TotalRounds["Go"], global_step) + writer.add_scalar("charts/AttackWinRatio", WinRounds["Attack"]/TotalRounds["Attack"], global_step) + writer.add_scalar("charts/FreeWinRatio", WinRounds["Free"]/TotalRounds["Free"], global_step) if rewardsMean > bestReward: bestReward = rewardsMean - saveDir = "../PPO-Model/bigArea-384-128-hybrid-" + str(rewardsMean) + ".pt" + saveDir = "../PPO-Model/Target-700-500-256-hybrid-" + str(rewardsMean) + ".pt" torch.save(agent, saveDir) env.close() diff --git a/Aimbot-PPO-Python/Pytorch/testarea.ipynb b/Aimbot-PPO-Python/Pytorch/testarea.ipynb index 9fbc548..c89e5d1 100644 --- a/Aimbot-PPO-Python/Pytorch/testarea.ipynb +++ b/Aimbot-PPO-Python/Pytorch/testarea.ipynb @@ -525,28 +525,22 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "start 0\n", - "end 3\n", - "start 3\n", - "end 6\n", - "start 6\n", - "end 9\n", - "start 9\n", - "end 12\n" + "{'Go': 1, 'Attack': 0, 'Free': 0}\n" ] } ], "source": [ - "for i in range(0,10,3):\n", - " print(\"start\",i)\n", - " print('end',i+3)" + "Total = {\"Go\":0,\"Attack\":0,\"Free\":0}\n", + "\n", + "Total[\"Go\"] +=1\n", + "print(Total)" ] } ],