diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..26df38b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.linting.enabled": false +} \ No newline at end of file diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py index af97665..97b6ab4 100644 --- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py +++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py @@ -1,5 +1,4 @@ import argparse -import wandb import time import numpy as np import random @@ -9,20 +8,14 @@ import torch.nn as nn import torch.optim as optim import atexit -from AimbotEnv import Aimbot -from tqdm import tqdm + +from aimbotEnv import Aimbot +from ppoagent import PPOAgent +from ppoagent import GAE +from ppoagent import AimbotSideChannel +from airecorder import WandbRecorder from enum import Enum -from torch.distributions.normal import Normal -from torch.distributions.categorical import Categorical from distutils.util import strtobool -from torch.utils.tensorboard import SummaryWriter -from mlagents_envs.environment import UnityEnvironment -from mlagents_envs.side_channel.side_channel import ( - SideChannel, - IncomingMessage, - OutgoingMessage, -) -from typing import List bestReward = -1 @@ -62,11 +55,11 @@ BROADCASTREWARD = False ANNEAL_LEARNING_RATE = True CLIP_VLOSS = True NORM_ADV = False -TRAIN = False +TRAIN = True SAVE_MODEL = False -WANDB_TACK = True +WANDB_TACK = False LOAD_DIR = None -LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt" +#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt" # public data class Targets(Enum): @@ -86,8 +79,6 @@ BASE_LOSEREWARD = -999 TARGETNUM= 4 ENV_TIMELIMIT = 30 RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT -TotalRounds = {"Free":0,"Go":0,"Attack":0} -WinRounds = {"Free":0,"Go":0,"Attack":0} # !!!SPECIAL PARAMETERS!!! # change it while program is finished @@ -168,215 +159,6 @@ def parse_args(): return args -def layer_init(layer, std=np.sqrt(2), bias_const=0.0): - torch.nn.init.orthogonal_(layer.weight, std) - torch.nn.init.constant_(layer.bias, bias_const) - return layer - - -class PPOAgent(nn.Module): - def __init__(self, env: Aimbot,targetNum:int): - super(PPOAgent, self).__init__() - self.targetNum = targetNum - self.stateSize = env.unity_observation_shape[0] - self.agentNum = env.unity_agent_num - self.targetSize = TARGET_STATE_SIZE - self.timeSize = TIME_STATE_SIZE - self.gunSize = GUN_STATE_SIZE - self.myStateSize = MY_STATE_SIZE - self.raySize = env.unity_observation_shape[0] - TOTAL_T_SIZE - self.nonRaySize = TOTAL_T_SIZE - self.head_input_size = env.unity_observation_shape[0] - self.targetSize-self.timeSize-self.gunSize# except target state input - - self.discrete_size = env.unity_discrete_size - self.discrete_shape = list(env.unity_discrete_branches) - self.continuous_size = env.unity_continuous_size - - self.viewNetwork = nn.Sequential( - layer_init(nn.Linear(self.raySize, 200)), - nn.LeakyReLU() - ) - self.targetNetworks = nn.ModuleList([nn.Sequential( - layer_init(nn.Linear(self.nonRaySize, 100)), - nn.LeakyReLU() - )for i in range(targetNum)]) - self.middleNetworks = nn.ModuleList([nn.Sequential( - layer_init(nn.Linear(300,200)), - nn.LeakyReLU() - )for i in range(targetNum)]) - self.actor_dis = nn.ModuleList([layer_init(nn.Linear(200, self.discrete_size), std=0.5) for i in range(targetNum)]) - self.actor_mean = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=0.5) for i in range(targetNum)]) - # self.actor_logstd = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)]) - # self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size)) - self.actor_logstd = nn.ParameterList([nn.Parameter(torch.zeros(1,self.continuous_size))for i in range(targetNum)]) # nn.Parameter(torch.zeros(1, self.continuous_size)) - self.critic = nn.ModuleList([layer_init(nn.Linear(200, 1), std=1)for i in range(targetNum)]) - - def get_value(self, state: torch.Tensor): - target = state[:,0].to(torch.int32) # int - thisStateNum = target.size()[0] - viewInput = state[:,-self.raySize:] # all ray input - targetInput = state[:,:self.nonRaySize] - viewLayer = self.viewNetwork(viewInput) - targetLayer = torch.stack([self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)]) - middleInput = torch.cat([viewLayer,targetLayer],dim = 1) - middleLayer = torch.stack([self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)]) - criticV = torch.stack([self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.critic - return criticV - - def get_actions_value(self, state: torch.Tensor, actions=None): - target = state[:,0].to(torch.int32) # int - thisStateNum = target.size()[0] - viewInput = state[:,-self.raySize:] # all ray input - targetInput = state[:,:self.nonRaySize] - viewLayer = self.viewNetwork(viewInput) - targetLayer = torch.stack([self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)]) - middleInput = torch.cat([viewLayer,targetLayer],dim = 1) - middleLayer = torch.stack([self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)]) - - # discrete - # 递归targets的数量,既agent数来实现根据target不同来选用对应的输出网络计算输出 - dis_logits = torch.stack([self.actor_dis[target[i]](middleLayer[i]) for i in range(thisStateNum)]) - split_logits = torch.split(dis_logits, self.discrete_shape, dim=1) - multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits] - # continuous - actions_mean = torch.stack([self.actor_mean[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_mean(hidden) - # action_logstd = torch.stack([self.actor_logstd[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_logstd(hidden) - # action_logstd = self.actor_logstd.expand_as(actions_mean) # self.actor_logstd.expand_as(actions_mean) - action_logstd = torch.stack([torch.squeeze(self.actor_logstd[target[i]],0) for i in range(thisStateNum)]) - # print(action_logstd) - action_std = torch.exp(action_logstd) # torch.exp(action_logstd) - con_probs = Normal(actions_mean, action_std) - # critic - criticV = torch.stack([self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.critic - - if actions is None: - if args.train: - # select actions base on probability distribution model - disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals]) - conAct = con_probs.sample() - actions = torch.cat([disAct.T, conAct], dim=1) - else: - # select actions base on best probability distribution - # disAct = torch.stack([torch.argmax(logit, dim=1) for logit in split_logits]) - conAct = actions_mean - disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals]) - conAct = con_probs.sample() - actions = torch.cat([disAct.T, conAct], dim=1) - else: - disAct = actions[:, 0 : env.unity_discrete_type].T - conAct = actions[:, env.unity_discrete_type :] - dis_log_prob = torch.stack( - [ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)] - ) - dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals]) - return ( - actions, - dis_log_prob.sum(0), - dis_entropy.sum(0), - con_probs.log_prob(conAct).sum(1), - con_probs.entropy().sum(1), - criticV, - ) - - -def GAE(agent, args, rewards, dones, values, next_obs, next_done): - # GAE - with torch.no_grad(): - next_value = agent.get_value(next_obs).reshape(1, -1) - data_size = rewards.size()[0] - if args.gae: - advantages = torch.zeros_like(rewards).to(device) - lastgaelam = 0 - for t in reversed(range(data_size)): - if t == data_size - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = ( - delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam - ) - returns = advantages + values - else: - returns = torch.zeros_like(rewards).to(device) - for t in reversed(range(data_size)): - if t == data_size - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return - advantages = returns - values - return advantages, returns - -class AimbotSideChannel(SideChannel): - def __init__(self, channel_id: uuid.UUID) -> None: - super().__init__(channel_id) - def on_message_received(self, msg: IncomingMessage) -> None: - global SCrecieved # make sure this variable is global - """ - Note: We must implement this method of the SideChannel interface to - receive messages from Unity - Message will be sent like this: - "Warning|Message1|Message2|Message3" or - "Error|Message1|Message2|Message3" - """ - thisMessage = msg.read_string() - thisResult = thisMessage.split("|") - if(thisResult[0] == "result"): - TotalRounds[thisResult[1]]+=1 - if(thisResult[2] == "Win"): - WinRounds[thisResult[1]]+=1 - #print(TotalRounds) - #print(WinRounds) - elif(thisResult[0] == "Error"): - print(thisMessage) - - # # while Message type is Warning - # if(thisResult[0] == "Warning"): - # # while Message1 is result means one game is over - # if (thisResult[1] == "Result"): - # TotalRounds[thisResult[2]]+=1 - # # while Message3 is Win means this agent win this game - # if(thisResult[3] == "Win"): - # WinRounds[thisResult[2]]+=1 - # # while Message1 is GameState means this game is just start - # # and tell python which game mode is - # elif (thisResult[1] == "GameState"): - # SCrecieved = 1 - # # while Message type is Error - # elif(thisResult[0] == "Error"): - # print(thisMessage) - # 发送函数 - def send_string(self, data: str) -> None: - # send a string toC# - msg = OutgoingMessage() - msg.write_string(data) - super().queue_message_to_send(msg) - - def send_bool(self, data: bool) -> None: - msg = OutgoingMessage() - msg.write_bool(data) - super().queue_message_to_send(msg) - - def send_int(self, data: int) -> None: - msg = OutgoingMessage() - msg.write_int32(data) - super().queue_message_to_send(msg) - - def send_float(self, data: float) -> None: - msg = OutgoingMessage() - msg.write_float32(data) - super().queue_message_to_send(msg) - - def send_float_list(self, data: List[float]) -> None: - msg = OutgoingMessage() - msg.write_float32_list(data) - super().queue_message_to_send(msg) - def broadCastEndReward(rewardBF:list,remainTime:float): thisRewardBF = rewardBF if (rewardBF[-1]<=-500): @@ -404,7 +186,16 @@ if __name__ == "__main__": aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID); env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel]) if args.load_dir is None: - agent = PPOAgent(env,TARGETNUM).to(device) + agent = PPOAgent( + env = env, + trainAgent=args.train, + targetNum=TARGETNUM, + target_state_size= TARGET_STATE_SIZE, + time_state_size=TIME_STATE_SIZE, + gun_state_size=GUN_STATE_SIZE, + my_state_size=MY_STATE_SIZE, + total_t_size=TOTAL_T_SIZE, + ).to(device) else: agent = torch.load(args.load_dir) # freeze @@ -420,23 +211,7 @@ if __name__ == "__main__": # Tensorboard and WandB Recorder run_name = f"{game_type}_{args.seed}_{int(time.time())}" - if args.wandb_track: - wandb.init( - project=game_name, - entity=args.wandb_entity, - sync_tensorboard=True, - config=vars(args), - name=run_name, - monitor_gym=True, - save_code=True, - ) - - writer = SummaryWriter(f"runs/{run_name}") - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" - % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])), - ) + wdb_recorder = WandbRecorder(game_name, game_type, run_name, args) @atexit.register def save_model(): @@ -538,6 +313,7 @@ if __name__ == "__main__": torch.tensor(values_bf[i]).to(device), torch.tensor(next_state[i]).to(device).unsqueeze(0), torch.Tensor([next_done[i]]).to(device), + device, ) # send memories to training datasets obs[roundTargetType] = torch.cat((obs[roundTargetType], torch.tensor(ob_bf[i]).to(device)), 0) @@ -599,6 +375,7 @@ if __name__ == "__main__": torch.tensor(values_bf[i]).to(device), torch.Tensor(next_state[i]).to(device).unsqueeze(dim = 0), torch.Tensor([next_done[i]]).to(device), + device ) # send memories to training datasets obs[roundTargetType] = torch.cat((obs[roundTargetType], torch.tensor(ob_bf[i]).to(device)), 0) @@ -629,6 +406,7 @@ if __name__ == "__main__": i += 1 if args.train: + # train mode on meanRewardList = [] # for WANDB # loop all tarining queue for thisT in trainQueue: @@ -766,17 +544,24 @@ if __name__ == "__main__": returns[thisT] = torch.tensor([]).to(device) # record rewards for plotting purposes - writer.add_scalar(f"Target{targetName}/value_loss", v_loss.item(), target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/con_policy_loss", con_pg_loss.item(), target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/total_loss", loss.item(), target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/entropy_loss", entropy_loss.item(), target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/WinRatio", WinRounds[targetName]/TotalRounds[targetName], target_steps[thisT]) + wdb_recorder.add_target_scalar( + targetName, + thisT, + v_loss, + dis_pg_loss, + con_pg_loss, + loss, + entropy_loss, + targetRewardMean, + target_steps, + ) print(f"episode over Target{targetName} mean reward:", targetRewardMean) TotalRewardMean = np.mean(meanRewardList) - writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps) - writer.add_scalar("GlobalCharts/learning_rate", optimizer.param_groups[0]["lr"], total_steps) + wdb_recorder.add_global_scalar( + TotalRewardMean, + optimizer.param_groups[0]["lr"], + total_steps, + ) # print cost time as seconds print("cost time:", time.time() - start_time) # New Record! @@ -785,6 +570,7 @@ if __name__ == "__main__": saveDir = "../PPO-Model/" + run_name +"_"+ str(TotalRewardMean) + ".pt" torch.save(agent, saveDir) else: + # train mode off meanRewardList = [] # for WANDB # while not in training mode, clear the buffer for thisT in trainQueue: @@ -804,14 +590,13 @@ if __name__ == "__main__": returns[thisT] = torch.tensor([]).to(device) # record rewards for plotting purposes - - writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT]) - writer.add_scalar(f"Target{targetName}/WinRatio", WinRounds[targetName]/TotalRounds[targetName], target_steps[thisT]) + wdb_recorder.writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT]) + wdb_recorder.add_win_ratio(targetName,target_steps[thisT]) print(f"episode over Target{targetName} mean reward:", targetRewardMean) TotalRewardMean = np.mean(meanRewardList) - writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps) + wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps) saveDir = "../PPO-Model/"+ run_name + "_last.pt" torch.save(agent, saveDir) env.close() - writer.close() + wdb_recorder.writer.close() diff --git a/Aimbot-PPO-Python/Pytorch/airecorder.py b/Aimbot-PPO-Python/Pytorch/airecorder.py new file mode 100644 index 0000000..3cea9df --- /dev/null +++ b/Aimbot-PPO-Python/Pytorch/airecorder.py @@ -0,0 +1,82 @@ +import wandb +import time +from torch.utils.tensorboard import SummaryWriter + + +total_rounds = {"Free": 0, "Go": 0, "Attack": 0} +win_rounds = {"Free": 0, "Go": 0, "Attack": 0} + + +# class for wandb recording +class WandbRecorder: + def __init__(self, game_name: str, game_type: str, run_name: str, _args) -> None: + # init wandb + self.game_name = game_name + self.game_type = game_type + self._args = _args + self.run_name = run_name + if self._args.wandb_track: + wandb.init( + project=self.game_name, + entity=self._args.wandb_entity, + sync_tensorboard=True, + config=vars(self._args), + name=self.run_name, + monitor_gym=True, + save_code=True, + ) + self.writer = SummaryWriter(f"runs/{self.run_name}") + self.writer.add_text( + "hyperparameters", + "|param|value|\n|-|-|\n%s" + % ("\n".join([f"|{key}|{value}|" for key, value in vars(self._args).items()])), + ) + + def add_target_scalar( + self, + target_name, + thisT, + v_loss, + dis_pg_loss, + con_pg_loss, + loss, + entropy_loss, + target_reward_mean, + target_steps, + ): + # fmt:off + self.writer.add_scalar( + f"Target{target_name}/value_loss", v_loss.item(), target_steps[thisT] + ) + self.writer.add_scalar( + f"Target{target_name}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT] + ) + self.writer.add_scalar( + f"Target{target_name}/con_policy_loss", con_pg_loss.item(), target_steps[thisT] + ) + self.writer.add_scalar( + f"Target{target_name}/total_loss", loss.item(), target_steps[thisT] + ) + self.writer.add_scalar( + f"Target{target_name}/entropy_loss", entropy_loss.item(), target_steps[thisT] + ) + self.writer.add_scalar( + f"Target{target_name}/Reward", target_reward_mean, target_steps[thisT] + ) + self.writer.add_scalar( + f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps[thisT], + ) + # fmt:on + + def add_global_scalar( + self, + total_reward_mean, + learning_rate, + total_steps, + ): + self.writer.add_scalar("GlobalCharts/TotalRewardMean", total_reward_mean, total_steps) + self.writer.add_scalar("GlobalCharts/learning_rate", learning_rate, total_steps) + def add_win_ratio(self, target_name, target_steps): + self.writer.add_scalar( + f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps, + ) diff --git a/Aimbot-PPO-Python/Pytorch/ppoagent.py b/Aimbot-PPO-Python/Pytorch/ppoagent.py new file mode 100644 index 0000000..d13bae0 --- /dev/null +++ b/Aimbot-PPO-Python/Pytorch/ppoagent.py @@ -0,0 +1,267 @@ +import numpy as np +import torch +import uuid +import airecorder +from torch import nn +from typing import List +from aimbotEnv import Aimbot +from torch.distributions.normal import Normal +from torch.distributions.categorical import Categorical +from mlagents_envs.side_channel.side_channel import ( + SideChannel, + IncomingMessage, + OutgoingMessage, +) + + +def layer_init(layer, std=np.sqrt(2), bias_const=0.0): + nn.init.orthogonal_(layer.weight, std) + nn.init.constant_(layer.bias, bias_const) + return layer + + +class PPOAgent(nn.Module): + def __init__( + self, + env: Aimbot, + trainAgent: bool, + targetNum: int, + target_state_size: int, + time_state_size: int, + gun_state_size: int, + my_state_size: int, + total_t_size: int, + ): + super(PPOAgent, self).__init__() + self.trainAgent = trainAgent + self.targetNum = targetNum + self.stateSize = env.unity_observation_shape[0] + self.agentNum = env.unity_agent_num + self.targetSize = target_state_size + self.timeSize = time_state_size + self.gunSize = gun_state_size + self.myStateSize = my_state_size + self.raySize = env.unity_observation_shape[0] - total_t_size + self.nonRaySize = total_t_size + self.head_input_size = ( + env.unity_observation_shape[0] - self.targetSize - self.timeSize - self.gunSize + ) # except target state input + + self.unityDiscreteType = env.unity_discrete_type + self.discrete_size = env.unity_discrete_size + self.discrete_shape = list(env.unity_discrete_branches) + self.continuous_size = env.unity_continuous_size + + self.viewNetwork = nn.Sequential(layer_init(nn.Linear(self.raySize, 200)), nn.LeakyReLU()) + self.targetNetworks = nn.ModuleList( + [ + nn.Sequential(layer_init(nn.Linear(self.nonRaySize, 100)), nn.LeakyReLU()) + for i in range(targetNum) + ] + ) + self.middleNetworks = nn.ModuleList( + [ + nn.Sequential(layer_init(nn.Linear(300, 200)), nn.LeakyReLU()) + for i in range(targetNum) + ] + ) + self.actor_dis = nn.ModuleList( + [layer_init(nn.Linear(200, self.discrete_size), std=0.5) for i in range(targetNum)] + ) + self.actor_mean = nn.ModuleList( + [layer_init(nn.Linear(200, self.continuous_size), std=0.5) for i in range(targetNum)] + ) + # self.actor_logstd = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)]) + # self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size)) + self.actor_logstd = nn.ParameterList( + [nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(targetNum)] + ) # nn.Parameter(torch.zeros(1, self.continuous_size)) + self.critic = nn.ModuleList( + [layer_init(nn.Linear(200, 1), std=1) for i in range(targetNum)] + ) + + def get_value(self, state: torch.Tensor): + target = state[:, 0].to(torch.int32) # int + thisStateNum = target.size()[0] + viewInput = state[:, -self.raySize :] # all ray input + targetInput = state[:, : self.nonRaySize] + viewLayer = self.viewNetwork(viewInput) + targetLayer = torch.stack( + [self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)] + ) + middleInput = torch.cat([viewLayer, targetLayer], dim=1) + middleLayer = torch.stack( + [self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)] + ) + criticV = torch.stack( + [self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)] + ) # self.critic + return criticV + + def get_actions_value(self, state: torch.Tensor, actions=None): + target = state[:, 0].to(torch.int32) # int + thisStateNum = target.size()[0] + viewInput = state[:, -self.raySize :] # all ray input + targetInput = state[:, : self.nonRaySize] + viewLayer = self.viewNetwork(viewInput) + targetLayer = torch.stack( + [self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)] + ) + middleInput = torch.cat([viewLayer, targetLayer], dim=1) + middleLayer = torch.stack( + [self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)] + ) + + # discrete + # 递归targets的数量,既agent数来实现根据target不同来选用对应的输出网络计算输出 + dis_logits = torch.stack( + [self.actor_dis[target[i]](middleLayer[i]) for i in range(thisStateNum)] + ) + split_logits = torch.split(dis_logits, self.discrete_shape, dim=1) + multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits] + # continuous + actions_mean = torch.stack( + [self.actor_mean[target[i]](middleLayer[i]) for i in range(thisStateNum)] + ) # self.actor_mean(hidden) + # action_logstd = torch.stack([self.actor_logstd[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_logstd(hidden) + # action_logstd = self.actor_logstd.expand_as(actions_mean) # self.actor_logstd.expand_as(actions_mean) + action_logstd = torch.stack( + [torch.squeeze(self.actor_logstd[target[i]], 0) for i in range(thisStateNum)] + ) + # print(action_logstd) + action_std = torch.exp(action_logstd) # torch.exp(action_logstd) + con_probs = Normal(actions_mean, action_std) + # critic + criticV = torch.stack( + [self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)] + ) # self.critic + + if actions is None: + if self.trainAgent: + # select actions base on probability distribution model + disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals]) + conAct = con_probs.sample() + actions = torch.cat([disAct.T, conAct], dim=1) + else: + # select actions base on best probability distribution + # disAct = torch.stack([torch.argmax(logit, dim=1) for logit in split_logits]) + conAct = actions_mean + disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals]) + conAct = con_probs.sample() + actions = torch.cat([disAct.T, conAct], dim=1) + else: + disAct = actions[:, 0 : self.unityDiscreteType].T + conAct = actions[:, self.unityDiscreteType :] + dis_log_prob = torch.stack( + [ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)] + ) + dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals]) + return ( + actions, + dis_log_prob.sum(0), + dis_entropy.sum(0), + con_probs.log_prob(conAct).sum(1), + con_probs.entropy().sum(1), + criticV, + ) + + +def GAE(agent, args, rewards, dones, values, next_obs, next_done, device): + # GAE + with torch.no_grad(): + next_value = agent.get_value(next_obs).reshape(1, -1) + data_size = rewards.size()[0] + if args.gae: + advantages = torch.zeros_like(rewards).to(device) + lastgaelam = 0 + for t in reversed(range(data_size)): + if t == data_size - 1: + nextnonterminal = 1.0 - next_done + nextvalues = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + nextvalues = values[t + 1] + delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t] + advantages[t] = lastgaelam = ( + delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam + ) + returns = advantages + values + else: + returns = torch.zeros_like(rewards).to(device) + for t in reversed(range(data_size)): + if t == data_size - 1: + nextnonterminal = 1.0 - next_done + next_return = next_value + else: + nextnonterminal = 1.0 - dones[t + 1] + next_return = returns[t + 1] + returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return + advantages = returns - values + return advantages, returns + + +class AimbotSideChannel(SideChannel): + def __init__(self, channel_id: uuid.UUID) -> None: + super().__init__(channel_id) + + def on_message_received(self, msg: IncomingMessage) -> None: + global SCrecieved # make sure this variable is global + """ + Note: We must implement this method of the SideChannel interface to + receive messages from Unity + Message will be sent like this: + "Warning|Message1|Message2|Message3" or + "Error|Message1|Message2|Message3" + """ + thisMessage = msg.read_string() + thisResult = thisMessage.split("|") + if(thisResult[0] == "result"): + airecorder.total_rounds[thisResult[1]]+=1 + if(thisResult[2] == "Win"): + airecorder.win_rounds[thisResult[1]]+=1 + #print(TotalRounds) + #print(WinRounds) + elif(thisResult[0] == "Error"): + print(thisMessage) + + # # while Message type is Warning + # if(thisResult[0] == "Warning"): + # # while Message1 is result means one game is over + # if (thisResult[1] == "Result"): + # TotalRounds[thisResult[2]]+=1 + # # while Message3 is Win means this agent win this game + # if(thisResult[3] == "Win"): + # WinRounds[thisResult[2]]+=1 + # # while Message1 is GameState means this game is just start + # # and tell python which game mode is + # elif (thisResult[1] == "GameState"): + # SCrecieved = 1 + # # while Message type is Error + # elif(thisResult[0] == "Error"): + # print(thisMessage) + # 发送函数 + def send_string(self, data: str) -> None: + # send a string toC# + msg = OutgoingMessage() + msg.write_string(data) + super().queue_message_to_send(msg) + + def send_bool(self, data: bool) -> None: + msg = OutgoingMessage() + msg.write_bool(data) + super().queue_message_to_send(msg) + + def send_int(self, data: int) -> None: + msg = OutgoingMessage() + msg.write_int32(data) + super().queue_message_to_send(msg) + + def send_float(self, data: float) -> None: + msg = OutgoingMessage() + msg.write_float32(data) + super().queue_message_to_send(msg) + + def send_float_list(self, data: List[float]) -> None: + msg = OutgoingMessage() + msg.write_float32_list(data) + super().queue_message_to_send(msg) \ No newline at end of file diff --git a/Aimbot-PPO-Python/Pytorch/test2.ipynb b/Aimbot-PPO-Python/Pytorch/test2.ipynb index dc895ad..7cd4d47 100644 --- a/Aimbot-PPO-Python/Pytorch/test2.ipynb +++ b/Aimbot-PPO-Python/Pytorch/test2.ipynb @@ -107,6 +107,40 @@ ")\n", "from typing import List\n" ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'aaa' object has no attribute 'outa'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[5], line 14\u001b[0m\n\u001b[0;32m 12\u001b[0m asd \u001b[39m=\u001b[39m aaa(outa, outb)\n\u001b[0;32m 13\u001b[0m asd\u001b[39m.\u001b[39mfunc()\n\u001b[1;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(asd\u001b[39m.\u001b[39;49mouta) \u001b[39m# 输出 100\u001b[39;00m\n", + "\u001b[1;31mAttributeError\u001b[0m: 'aaa' object has no attribute 'outa'" + ] + } + ], + "source": [ + "class aaa():\n", + " def __init__(self, a, b):\n", + " self.a = a\n", + " self.b = b\n", + "\n", + " def func(self):\n", + " global outa\n", + " outa = 100\n", + "\n", + "outa = 1\n", + "outb = 2\n", + "asd = aaa(outa, outb)\n", + "asd.func()\n", + "print(asd.outa) # 输出 100" + ] } ], "metadata": { @@ -125,7 +159,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.9.17" }, "orig_nbformat": 4 }, diff --git a/Aimbot-PPO-Python/Pytorch/testarea.ipynb b/Aimbot-PPO-Python/Pytorch/testarea.ipynb index 5b95a12..20d780a 100644 --- a/Aimbot-PPO-Python/Pytorch/testarea.ipynb +++ b/Aimbot-PPO-Python/Pytorch/testarea.ipynb @@ -62,7 +62,6 @@ "outputs": [], "source": [ "from mlagents_envs.environment import UnityEnvironment\n", - "from gym_unity.envs import UnityToGymWrapper\n", "import numpy as np\n", "\n", "ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n", @@ -368,6 +367,7 @@ ], "source": [ "import torch\n", + "from torch import nn\n", "\n", "def layer_init(layer, std=np.sqrt(2), bias_const=0.0):\n", " torch.nn.init.orthogonal_(layer.weight, std)\n", @@ -1248,6 +1248,24 @@ "saveDir = \"C:/Users/UCUNI/OneDrive/Unity/ML-Agents/Aimbot-PPO/Aimbot-PPO-Python/PPO-Model/Chimera-1677965178-1678547500.pt\"\n", "torch.save(badGotoAgent,saveDir)" ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "import torch\n", + "print(torch.cuda.is_available())" + ] } ], "metadata": { @@ -1266,7 +1284,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.17" }, "orig_nbformat": 4, "vscode": {