代码整理,不兼容过去的模型
代码整理,不兼容过去的模型
This commit is contained in:
parent
bee609d160
commit
177974888a
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.linting.enabled": false
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
import argparse
|
||||
import wandb
|
||||
import time
|
||||
import numpy as np
|
||||
import random
|
||||
@ -9,20 +8,14 @@ import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import atexit
|
||||
|
||||
from AimbotEnv import Aimbot
|
||||
from tqdm import tqdm
|
||||
|
||||
from aimbotEnv import Aimbot
|
||||
from ppoagent import PPOAgent
|
||||
from ppoagent import GAE
|
||||
from ppoagent import AimbotSideChannel
|
||||
from airecorder import WandbRecorder
|
||||
from enum import Enum
|
||||
from torch.distributions.normal import Normal
|
||||
from torch.distributions.categorical import Categorical
|
||||
from distutils.util import strtobool
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from mlagents_envs.environment import UnityEnvironment
|
||||
from mlagents_envs.side_channel.side_channel import (
|
||||
SideChannel,
|
||||
IncomingMessage,
|
||||
OutgoingMessage,
|
||||
)
|
||||
from typing import List
|
||||
|
||||
bestReward = -1
|
||||
|
||||
@ -62,11 +55,11 @@ BROADCASTREWARD = False
|
||||
ANNEAL_LEARNING_RATE = True
|
||||
CLIP_VLOSS = True
|
||||
NORM_ADV = False
|
||||
TRAIN = False
|
||||
TRAIN = True
|
||||
SAVE_MODEL = False
|
||||
WANDB_TACK = True
|
||||
WANDB_TACK = False
|
||||
LOAD_DIR = None
|
||||
LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
|
||||
#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
|
||||
|
||||
# public data
|
||||
class Targets(Enum):
|
||||
@ -86,8 +79,6 @@ BASE_LOSEREWARD = -999
|
||||
TARGETNUM= 4
|
||||
ENV_TIMELIMIT = 30
|
||||
RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT
|
||||
TotalRounds = {"Free":0,"Go":0,"Attack":0}
|
||||
WinRounds = {"Free":0,"Go":0,"Attack":0}
|
||||
|
||||
# !!!SPECIAL PARAMETERS!!!
|
||||
# change it while program is finished
|
||||
@ -168,215 +159,6 @@ def parse_args():
|
||||
return args
|
||||
|
||||
|
||||
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
|
||||
torch.nn.init.orthogonal_(layer.weight, std)
|
||||
torch.nn.init.constant_(layer.bias, bias_const)
|
||||
return layer
|
||||
|
||||
|
||||
class PPOAgent(nn.Module):
|
||||
def __init__(self, env: Aimbot,targetNum:int):
|
||||
super(PPOAgent, self).__init__()
|
||||
self.targetNum = targetNum
|
||||
self.stateSize = env.unity_observation_shape[0]
|
||||
self.agentNum = env.unity_agent_num
|
||||
self.targetSize = TARGET_STATE_SIZE
|
||||
self.timeSize = TIME_STATE_SIZE
|
||||
self.gunSize = GUN_STATE_SIZE
|
||||
self.myStateSize = MY_STATE_SIZE
|
||||
self.raySize = env.unity_observation_shape[0] - TOTAL_T_SIZE
|
||||
self.nonRaySize = TOTAL_T_SIZE
|
||||
self.head_input_size = env.unity_observation_shape[0] - self.targetSize-self.timeSize-self.gunSize# except target state input
|
||||
|
||||
self.discrete_size = env.unity_discrete_size
|
||||
self.discrete_shape = list(env.unity_discrete_branches)
|
||||
self.continuous_size = env.unity_continuous_size
|
||||
|
||||
self.viewNetwork = nn.Sequential(
|
||||
layer_init(nn.Linear(self.raySize, 200)),
|
||||
nn.LeakyReLU()
|
||||
)
|
||||
self.targetNetworks = nn.ModuleList([nn.Sequential(
|
||||
layer_init(nn.Linear(self.nonRaySize, 100)),
|
||||
nn.LeakyReLU()
|
||||
)for i in range(targetNum)])
|
||||
self.middleNetworks = nn.ModuleList([nn.Sequential(
|
||||
layer_init(nn.Linear(300,200)),
|
||||
nn.LeakyReLU()
|
||||
)for i in range(targetNum)])
|
||||
self.actor_dis = nn.ModuleList([layer_init(nn.Linear(200, self.discrete_size), std=0.5) for i in range(targetNum)])
|
||||
self.actor_mean = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=0.5) for i in range(targetNum)])
|
||||
# self.actor_logstd = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)])
|
||||
# self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))
|
||||
self.actor_logstd = nn.ParameterList([nn.Parameter(torch.zeros(1,self.continuous_size))for i in range(targetNum)]) # nn.Parameter(torch.zeros(1, self.continuous_size))
|
||||
self.critic = nn.ModuleList([layer_init(nn.Linear(200, 1), std=1)for i in range(targetNum)])
|
||||
|
||||
def get_value(self, state: torch.Tensor):
|
||||
target = state[:,0].to(torch.int32) # int
|
||||
thisStateNum = target.size()[0]
|
||||
viewInput = state[:,-self.raySize:] # all ray input
|
||||
targetInput = state[:,:self.nonRaySize]
|
||||
viewLayer = self.viewNetwork(viewInput)
|
||||
targetLayer = torch.stack([self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)])
|
||||
middleInput = torch.cat([viewLayer,targetLayer],dim = 1)
|
||||
middleLayer = torch.stack([self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)])
|
||||
criticV = torch.stack([self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.critic
|
||||
return criticV
|
||||
|
||||
def get_actions_value(self, state: torch.Tensor, actions=None):
|
||||
target = state[:,0].to(torch.int32) # int
|
||||
thisStateNum = target.size()[0]
|
||||
viewInput = state[:,-self.raySize:] # all ray input
|
||||
targetInput = state[:,:self.nonRaySize]
|
||||
viewLayer = self.viewNetwork(viewInput)
|
||||
targetLayer = torch.stack([self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)])
|
||||
middleInput = torch.cat([viewLayer,targetLayer],dim = 1)
|
||||
middleLayer = torch.stack([self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)])
|
||||
|
||||
# discrete
|
||||
# 递归targets的数量,既agent数来实现根据target不同来选用对应的输出网络计算输出
|
||||
dis_logits = torch.stack([self.actor_dis[target[i]](middleLayer[i]) for i in range(thisStateNum)])
|
||||
split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)
|
||||
multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]
|
||||
# continuous
|
||||
actions_mean = torch.stack([self.actor_mean[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_mean(hidden)
|
||||
# action_logstd = torch.stack([self.actor_logstd[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_logstd(hidden)
|
||||
# action_logstd = self.actor_logstd.expand_as(actions_mean) # self.actor_logstd.expand_as(actions_mean)
|
||||
action_logstd = torch.stack([torch.squeeze(self.actor_logstd[target[i]],0) for i in range(thisStateNum)])
|
||||
# print(action_logstd)
|
||||
action_std = torch.exp(action_logstd) # torch.exp(action_logstd)
|
||||
con_probs = Normal(actions_mean, action_std)
|
||||
# critic
|
||||
criticV = torch.stack([self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.critic
|
||||
|
||||
if actions is None:
|
||||
if args.train:
|
||||
# select actions base on probability distribution model
|
||||
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
|
||||
conAct = con_probs.sample()
|
||||
actions = torch.cat([disAct.T, conAct], dim=1)
|
||||
else:
|
||||
# select actions base on best probability distribution
|
||||
# disAct = torch.stack([torch.argmax(logit, dim=1) for logit in split_logits])
|
||||
conAct = actions_mean
|
||||
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
|
||||
conAct = con_probs.sample()
|
||||
actions = torch.cat([disAct.T, conAct], dim=1)
|
||||
else:
|
||||
disAct = actions[:, 0 : env.unity_discrete_type].T
|
||||
conAct = actions[:, env.unity_discrete_type :]
|
||||
dis_log_prob = torch.stack(
|
||||
[ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]
|
||||
)
|
||||
dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
|
||||
return (
|
||||
actions,
|
||||
dis_log_prob.sum(0),
|
||||
dis_entropy.sum(0),
|
||||
con_probs.log_prob(conAct).sum(1),
|
||||
con_probs.entropy().sum(1),
|
||||
criticV,
|
||||
)
|
||||
|
||||
|
||||
def GAE(agent, args, rewards, dones, values, next_obs, next_done):
|
||||
# GAE
|
||||
with torch.no_grad():
|
||||
next_value = agent.get_value(next_obs).reshape(1, -1)
|
||||
data_size = rewards.size()[0]
|
||||
if args.gae:
|
||||
advantages = torch.zeros_like(rewards).to(device)
|
||||
lastgaelam = 0
|
||||
for t in reversed(range(data_size)):
|
||||
if t == data_size - 1:
|
||||
nextnonterminal = 1.0 - next_done
|
||||
nextvalues = next_value
|
||||
else:
|
||||
nextnonterminal = 1.0 - dones[t + 1]
|
||||
nextvalues = values[t + 1]
|
||||
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
|
||||
advantages[t] = lastgaelam = (
|
||||
delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
|
||||
)
|
||||
returns = advantages + values
|
||||
else:
|
||||
returns = torch.zeros_like(rewards).to(device)
|
||||
for t in reversed(range(data_size)):
|
||||
if t == data_size - 1:
|
||||
nextnonterminal = 1.0 - next_done
|
||||
next_return = next_value
|
||||
else:
|
||||
nextnonterminal = 1.0 - dones[t + 1]
|
||||
next_return = returns[t + 1]
|
||||
returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
|
||||
advantages = returns - values
|
||||
return advantages, returns
|
||||
|
||||
class AimbotSideChannel(SideChannel):
|
||||
def __init__(self, channel_id: uuid.UUID) -> None:
|
||||
super().__init__(channel_id)
|
||||
def on_message_received(self, msg: IncomingMessage) -> None:
|
||||
global SCrecieved # make sure this variable is global
|
||||
"""
|
||||
Note: We must implement this method of the SideChannel interface to
|
||||
receive messages from Unity
|
||||
Message will be sent like this:
|
||||
"Warning|Message1|Message2|Message3" or
|
||||
"Error|Message1|Message2|Message3"
|
||||
"""
|
||||
thisMessage = msg.read_string()
|
||||
thisResult = thisMessage.split("|")
|
||||
if(thisResult[0] == "result"):
|
||||
TotalRounds[thisResult[1]]+=1
|
||||
if(thisResult[2] == "Win"):
|
||||
WinRounds[thisResult[1]]+=1
|
||||
#print(TotalRounds)
|
||||
#print(WinRounds)
|
||||
elif(thisResult[0] == "Error"):
|
||||
print(thisMessage)
|
||||
|
||||
# # while Message type is Warning
|
||||
# if(thisResult[0] == "Warning"):
|
||||
# # while Message1 is result means one game is over
|
||||
# if (thisResult[1] == "Result"):
|
||||
# TotalRounds[thisResult[2]]+=1
|
||||
# # while Message3 is Win means this agent win this game
|
||||
# if(thisResult[3] == "Win"):
|
||||
# WinRounds[thisResult[2]]+=1
|
||||
# # while Message1 is GameState means this game is just start
|
||||
# # and tell python which game mode is
|
||||
# elif (thisResult[1] == "GameState"):
|
||||
# SCrecieved = 1
|
||||
# # while Message type is Error
|
||||
# elif(thisResult[0] == "Error"):
|
||||
# print(thisMessage)
|
||||
# 发送函数
|
||||
def send_string(self, data: str) -> None:
|
||||
# send a string toC#
|
||||
msg = OutgoingMessage()
|
||||
msg.write_string(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_bool(self, data: bool) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_bool(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_int(self, data: int) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_int32(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_float(self, data: float) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_float32(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_float_list(self, data: List[float]) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_float32_list(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def broadCastEndReward(rewardBF:list,remainTime:float):
|
||||
thisRewardBF = rewardBF
|
||||
if (rewardBF[-1]<=-500):
|
||||
@ -404,7 +186,16 @@ if __name__ == "__main__":
|
||||
aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID);
|
||||
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel])
|
||||
if args.load_dir is None:
|
||||
agent = PPOAgent(env,TARGETNUM).to(device)
|
||||
agent = PPOAgent(
|
||||
env = env,
|
||||
trainAgent=args.train,
|
||||
targetNum=TARGETNUM,
|
||||
target_state_size= TARGET_STATE_SIZE,
|
||||
time_state_size=TIME_STATE_SIZE,
|
||||
gun_state_size=GUN_STATE_SIZE,
|
||||
my_state_size=MY_STATE_SIZE,
|
||||
total_t_size=TOTAL_T_SIZE,
|
||||
).to(device)
|
||||
else:
|
||||
agent = torch.load(args.load_dir)
|
||||
# freeze
|
||||
@ -420,23 +211,7 @@ if __name__ == "__main__":
|
||||
|
||||
# Tensorboard and WandB Recorder
|
||||
run_name = f"{game_type}_{args.seed}_{int(time.time())}"
|
||||
if args.wandb_track:
|
||||
wandb.init(
|
||||
project=game_name,
|
||||
entity=args.wandb_entity,
|
||||
sync_tensorboard=True,
|
||||
config=vars(args),
|
||||
name=run_name,
|
||||
monitor_gym=True,
|
||||
save_code=True,
|
||||
)
|
||||
|
||||
writer = SummaryWriter(f"runs/{run_name}")
|
||||
writer.add_text(
|
||||
"hyperparameters",
|
||||
"|param|value|\n|-|-|\n%s"
|
||||
% ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
|
||||
)
|
||||
wdb_recorder = WandbRecorder(game_name, game_type, run_name, args)
|
||||
|
||||
@atexit.register
|
||||
def save_model():
|
||||
@ -538,6 +313,7 @@ if __name__ == "__main__":
|
||||
torch.tensor(values_bf[i]).to(device),
|
||||
torch.tensor(next_state[i]).to(device).unsqueeze(0),
|
||||
torch.Tensor([next_done[i]]).to(device),
|
||||
device,
|
||||
)
|
||||
# send memories to training datasets
|
||||
obs[roundTargetType] = torch.cat((obs[roundTargetType], torch.tensor(ob_bf[i]).to(device)), 0)
|
||||
@ -599,6 +375,7 @@ if __name__ == "__main__":
|
||||
torch.tensor(values_bf[i]).to(device),
|
||||
torch.Tensor(next_state[i]).to(device).unsqueeze(dim = 0),
|
||||
torch.Tensor([next_done[i]]).to(device),
|
||||
device
|
||||
)
|
||||
# send memories to training datasets
|
||||
obs[roundTargetType] = torch.cat((obs[roundTargetType], torch.tensor(ob_bf[i]).to(device)), 0)
|
||||
@ -629,6 +406,7 @@ if __name__ == "__main__":
|
||||
i += 1
|
||||
|
||||
if args.train:
|
||||
# train mode on
|
||||
meanRewardList = [] # for WANDB
|
||||
# loop all tarining queue
|
||||
for thisT in trainQueue:
|
||||
@ -766,17 +544,24 @@ if __name__ == "__main__":
|
||||
returns[thisT] = torch.tensor([]).to(device)
|
||||
|
||||
# record rewards for plotting purposes
|
||||
writer.add_scalar(f"Target{targetName}/value_loss", v_loss.item(), target_steps[thisT])
|
||||
writer.add_scalar(f"Target{targetName}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT])
|
||||
writer.add_scalar(f"Target{targetName}/con_policy_loss", con_pg_loss.item(), target_steps[thisT])
|
||||
writer.add_scalar(f"Target{targetName}/total_loss", loss.item(), target_steps[thisT])
|
||||
writer.add_scalar(f"Target{targetName}/entropy_loss", entropy_loss.item(), target_steps[thisT])
|
||||
writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT])
|
||||
writer.add_scalar(f"Target{targetName}/WinRatio", WinRounds[targetName]/TotalRounds[targetName], target_steps[thisT])
|
||||
wdb_recorder.add_target_scalar(
|
||||
targetName,
|
||||
thisT,
|
||||
v_loss,
|
||||
dis_pg_loss,
|
||||
con_pg_loss,
|
||||
loss,
|
||||
entropy_loss,
|
||||
targetRewardMean,
|
||||
target_steps,
|
||||
)
|
||||
print(f"episode over Target{targetName} mean reward:", targetRewardMean)
|
||||
TotalRewardMean = np.mean(meanRewardList)
|
||||
writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
|
||||
writer.add_scalar("GlobalCharts/learning_rate", optimizer.param_groups[0]["lr"], total_steps)
|
||||
wdb_recorder.add_global_scalar(
|
||||
TotalRewardMean,
|
||||
optimizer.param_groups[0]["lr"],
|
||||
total_steps,
|
||||
)
|
||||
# print cost time as seconds
|
||||
print("cost time:", time.time() - start_time)
|
||||
# New Record!
|
||||
@ -785,6 +570,7 @@ if __name__ == "__main__":
|
||||
saveDir = "../PPO-Model/" + run_name +"_"+ str(TotalRewardMean) + ".pt"
|
||||
torch.save(agent, saveDir)
|
||||
else:
|
||||
# train mode off
|
||||
meanRewardList = [] # for WANDB
|
||||
# while not in training mode, clear the buffer
|
||||
for thisT in trainQueue:
|
||||
@ -804,14 +590,13 @@ if __name__ == "__main__":
|
||||
returns[thisT] = torch.tensor([]).to(device)
|
||||
|
||||
# record rewards for plotting purposes
|
||||
|
||||
writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT])
|
||||
writer.add_scalar(f"Target{targetName}/WinRatio", WinRounds[targetName]/TotalRounds[targetName], target_steps[thisT])
|
||||
wdb_recorder.writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT])
|
||||
wdb_recorder.add_win_ratio(targetName,target_steps[thisT])
|
||||
print(f"episode over Target{targetName} mean reward:", targetRewardMean)
|
||||
TotalRewardMean = np.mean(meanRewardList)
|
||||
writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
|
||||
wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
|
||||
|
||||
saveDir = "../PPO-Model/"+ run_name + "_last.pt"
|
||||
torch.save(agent, saveDir)
|
||||
env.close()
|
||||
writer.close()
|
||||
wdb_recorder.writer.close()
|
||||
|
82
Aimbot-PPO-Python/Pytorch/airecorder.py
Normal file
82
Aimbot-PPO-Python/Pytorch/airecorder.py
Normal file
@ -0,0 +1,82 @@
|
||||
import wandb
|
||||
import time
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
|
||||
total_rounds = {"Free": 0, "Go": 0, "Attack": 0}
|
||||
win_rounds = {"Free": 0, "Go": 0, "Attack": 0}
|
||||
|
||||
|
||||
# class for wandb recording
|
||||
class WandbRecorder:
|
||||
def __init__(self, game_name: str, game_type: str, run_name: str, _args) -> None:
|
||||
# init wandb
|
||||
self.game_name = game_name
|
||||
self.game_type = game_type
|
||||
self._args = _args
|
||||
self.run_name = run_name
|
||||
if self._args.wandb_track:
|
||||
wandb.init(
|
||||
project=self.game_name,
|
||||
entity=self._args.wandb_entity,
|
||||
sync_tensorboard=True,
|
||||
config=vars(self._args),
|
||||
name=self.run_name,
|
||||
monitor_gym=True,
|
||||
save_code=True,
|
||||
)
|
||||
self.writer = SummaryWriter(f"runs/{self.run_name}")
|
||||
self.writer.add_text(
|
||||
"hyperparameters",
|
||||
"|param|value|\n|-|-|\n%s"
|
||||
% ("\n".join([f"|{key}|{value}|" for key, value in vars(self._args).items()])),
|
||||
)
|
||||
|
||||
def add_target_scalar(
|
||||
self,
|
||||
target_name,
|
||||
thisT,
|
||||
v_loss,
|
||||
dis_pg_loss,
|
||||
con_pg_loss,
|
||||
loss,
|
||||
entropy_loss,
|
||||
target_reward_mean,
|
||||
target_steps,
|
||||
):
|
||||
# fmt:off
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/value_loss", v_loss.item(), target_steps[thisT]
|
||||
)
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT]
|
||||
)
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/con_policy_loss", con_pg_loss.item(), target_steps[thisT]
|
||||
)
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/total_loss", loss.item(), target_steps[thisT]
|
||||
)
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/entropy_loss", entropy_loss.item(), target_steps[thisT]
|
||||
)
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/Reward", target_reward_mean, target_steps[thisT]
|
||||
)
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps[thisT],
|
||||
)
|
||||
# fmt:on
|
||||
|
||||
def add_global_scalar(
|
||||
self,
|
||||
total_reward_mean,
|
||||
learning_rate,
|
||||
total_steps,
|
||||
):
|
||||
self.writer.add_scalar("GlobalCharts/TotalRewardMean", total_reward_mean, total_steps)
|
||||
self.writer.add_scalar("GlobalCharts/learning_rate", learning_rate, total_steps)
|
||||
def add_win_ratio(self, target_name, target_steps):
|
||||
self.writer.add_scalar(
|
||||
f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps,
|
||||
)
|
267
Aimbot-PPO-Python/Pytorch/ppoagent.py
Normal file
267
Aimbot-PPO-Python/Pytorch/ppoagent.py
Normal file
@ -0,0 +1,267 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import uuid
|
||||
import airecorder
|
||||
from torch import nn
|
||||
from typing import List
|
||||
from aimbotEnv import Aimbot
|
||||
from torch.distributions.normal import Normal
|
||||
from torch.distributions.categorical import Categorical
|
||||
from mlagents_envs.side_channel.side_channel import (
|
||||
SideChannel,
|
||||
IncomingMessage,
|
||||
OutgoingMessage,
|
||||
)
|
||||
|
||||
|
||||
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
|
||||
nn.init.orthogonal_(layer.weight, std)
|
||||
nn.init.constant_(layer.bias, bias_const)
|
||||
return layer
|
||||
|
||||
|
||||
class PPOAgent(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
env: Aimbot,
|
||||
trainAgent: bool,
|
||||
targetNum: int,
|
||||
target_state_size: int,
|
||||
time_state_size: int,
|
||||
gun_state_size: int,
|
||||
my_state_size: int,
|
||||
total_t_size: int,
|
||||
):
|
||||
super(PPOAgent, self).__init__()
|
||||
self.trainAgent = trainAgent
|
||||
self.targetNum = targetNum
|
||||
self.stateSize = env.unity_observation_shape[0]
|
||||
self.agentNum = env.unity_agent_num
|
||||
self.targetSize = target_state_size
|
||||
self.timeSize = time_state_size
|
||||
self.gunSize = gun_state_size
|
||||
self.myStateSize = my_state_size
|
||||
self.raySize = env.unity_observation_shape[0] - total_t_size
|
||||
self.nonRaySize = total_t_size
|
||||
self.head_input_size = (
|
||||
env.unity_observation_shape[0] - self.targetSize - self.timeSize - self.gunSize
|
||||
) # except target state input
|
||||
|
||||
self.unityDiscreteType = env.unity_discrete_type
|
||||
self.discrete_size = env.unity_discrete_size
|
||||
self.discrete_shape = list(env.unity_discrete_branches)
|
||||
self.continuous_size = env.unity_continuous_size
|
||||
|
||||
self.viewNetwork = nn.Sequential(layer_init(nn.Linear(self.raySize, 200)), nn.LeakyReLU())
|
||||
self.targetNetworks = nn.ModuleList(
|
||||
[
|
||||
nn.Sequential(layer_init(nn.Linear(self.nonRaySize, 100)), nn.LeakyReLU())
|
||||
for i in range(targetNum)
|
||||
]
|
||||
)
|
||||
self.middleNetworks = nn.ModuleList(
|
||||
[
|
||||
nn.Sequential(layer_init(nn.Linear(300, 200)), nn.LeakyReLU())
|
||||
for i in range(targetNum)
|
||||
]
|
||||
)
|
||||
self.actor_dis = nn.ModuleList(
|
||||
[layer_init(nn.Linear(200, self.discrete_size), std=0.5) for i in range(targetNum)]
|
||||
)
|
||||
self.actor_mean = nn.ModuleList(
|
||||
[layer_init(nn.Linear(200, self.continuous_size), std=0.5) for i in range(targetNum)]
|
||||
)
|
||||
# self.actor_logstd = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)])
|
||||
# self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))
|
||||
self.actor_logstd = nn.ParameterList(
|
||||
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(targetNum)]
|
||||
) # nn.Parameter(torch.zeros(1, self.continuous_size))
|
||||
self.critic = nn.ModuleList(
|
||||
[layer_init(nn.Linear(200, 1), std=1) for i in range(targetNum)]
|
||||
)
|
||||
|
||||
def get_value(self, state: torch.Tensor):
|
||||
target = state[:, 0].to(torch.int32) # int
|
||||
thisStateNum = target.size()[0]
|
||||
viewInput = state[:, -self.raySize :] # all ray input
|
||||
targetInput = state[:, : self.nonRaySize]
|
||||
viewLayer = self.viewNetwork(viewInput)
|
||||
targetLayer = torch.stack(
|
||||
[self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)]
|
||||
)
|
||||
middleInput = torch.cat([viewLayer, targetLayer], dim=1)
|
||||
middleLayer = torch.stack(
|
||||
[self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)]
|
||||
)
|
||||
criticV = torch.stack(
|
||||
[self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]
|
||||
) # self.critic
|
||||
return criticV
|
||||
|
||||
def get_actions_value(self, state: torch.Tensor, actions=None):
|
||||
target = state[:, 0].to(torch.int32) # int
|
||||
thisStateNum = target.size()[0]
|
||||
viewInput = state[:, -self.raySize :] # all ray input
|
||||
targetInput = state[:, : self.nonRaySize]
|
||||
viewLayer = self.viewNetwork(viewInput)
|
||||
targetLayer = torch.stack(
|
||||
[self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)]
|
||||
)
|
||||
middleInput = torch.cat([viewLayer, targetLayer], dim=1)
|
||||
middleLayer = torch.stack(
|
||||
[self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)]
|
||||
)
|
||||
|
||||
# discrete
|
||||
# 递归targets的数量,既agent数来实现根据target不同来选用对应的输出网络计算输出
|
||||
dis_logits = torch.stack(
|
||||
[self.actor_dis[target[i]](middleLayer[i]) for i in range(thisStateNum)]
|
||||
)
|
||||
split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)
|
||||
multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]
|
||||
# continuous
|
||||
actions_mean = torch.stack(
|
||||
[self.actor_mean[target[i]](middleLayer[i]) for i in range(thisStateNum)]
|
||||
) # self.actor_mean(hidden)
|
||||
# action_logstd = torch.stack([self.actor_logstd[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_logstd(hidden)
|
||||
# action_logstd = self.actor_logstd.expand_as(actions_mean) # self.actor_logstd.expand_as(actions_mean)
|
||||
action_logstd = torch.stack(
|
||||
[torch.squeeze(self.actor_logstd[target[i]], 0) for i in range(thisStateNum)]
|
||||
)
|
||||
# print(action_logstd)
|
||||
action_std = torch.exp(action_logstd) # torch.exp(action_logstd)
|
||||
con_probs = Normal(actions_mean, action_std)
|
||||
# critic
|
||||
criticV = torch.stack(
|
||||
[self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]
|
||||
) # self.critic
|
||||
|
||||
if actions is None:
|
||||
if self.trainAgent:
|
||||
# select actions base on probability distribution model
|
||||
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
|
||||
conAct = con_probs.sample()
|
||||
actions = torch.cat([disAct.T, conAct], dim=1)
|
||||
else:
|
||||
# select actions base on best probability distribution
|
||||
# disAct = torch.stack([torch.argmax(logit, dim=1) for logit in split_logits])
|
||||
conAct = actions_mean
|
||||
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
|
||||
conAct = con_probs.sample()
|
||||
actions = torch.cat([disAct.T, conAct], dim=1)
|
||||
else:
|
||||
disAct = actions[:, 0 : self.unityDiscreteType].T
|
||||
conAct = actions[:, self.unityDiscreteType :]
|
||||
dis_log_prob = torch.stack(
|
||||
[ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]
|
||||
)
|
||||
dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
|
||||
return (
|
||||
actions,
|
||||
dis_log_prob.sum(0),
|
||||
dis_entropy.sum(0),
|
||||
con_probs.log_prob(conAct).sum(1),
|
||||
con_probs.entropy().sum(1),
|
||||
criticV,
|
||||
)
|
||||
|
||||
|
||||
def GAE(agent, args, rewards, dones, values, next_obs, next_done, device):
|
||||
# GAE
|
||||
with torch.no_grad():
|
||||
next_value = agent.get_value(next_obs).reshape(1, -1)
|
||||
data_size = rewards.size()[0]
|
||||
if args.gae:
|
||||
advantages = torch.zeros_like(rewards).to(device)
|
||||
lastgaelam = 0
|
||||
for t in reversed(range(data_size)):
|
||||
if t == data_size - 1:
|
||||
nextnonterminal = 1.0 - next_done
|
||||
nextvalues = next_value
|
||||
else:
|
||||
nextnonterminal = 1.0 - dones[t + 1]
|
||||
nextvalues = values[t + 1]
|
||||
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
|
||||
advantages[t] = lastgaelam = (
|
||||
delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
|
||||
)
|
||||
returns = advantages + values
|
||||
else:
|
||||
returns = torch.zeros_like(rewards).to(device)
|
||||
for t in reversed(range(data_size)):
|
||||
if t == data_size - 1:
|
||||
nextnonterminal = 1.0 - next_done
|
||||
next_return = next_value
|
||||
else:
|
||||
nextnonterminal = 1.0 - dones[t + 1]
|
||||
next_return = returns[t + 1]
|
||||
returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
|
||||
advantages = returns - values
|
||||
return advantages, returns
|
||||
|
||||
|
||||
class AimbotSideChannel(SideChannel):
|
||||
def __init__(self, channel_id: uuid.UUID) -> None:
|
||||
super().__init__(channel_id)
|
||||
|
||||
def on_message_received(self, msg: IncomingMessage) -> None:
|
||||
global SCrecieved # make sure this variable is global
|
||||
"""
|
||||
Note: We must implement this method of the SideChannel interface to
|
||||
receive messages from Unity
|
||||
Message will be sent like this:
|
||||
"Warning|Message1|Message2|Message3" or
|
||||
"Error|Message1|Message2|Message3"
|
||||
"""
|
||||
thisMessage = msg.read_string()
|
||||
thisResult = thisMessage.split("|")
|
||||
if(thisResult[0] == "result"):
|
||||
airecorder.total_rounds[thisResult[1]]+=1
|
||||
if(thisResult[2] == "Win"):
|
||||
airecorder.win_rounds[thisResult[1]]+=1
|
||||
#print(TotalRounds)
|
||||
#print(WinRounds)
|
||||
elif(thisResult[0] == "Error"):
|
||||
print(thisMessage)
|
||||
|
||||
# # while Message type is Warning
|
||||
# if(thisResult[0] == "Warning"):
|
||||
# # while Message1 is result means one game is over
|
||||
# if (thisResult[1] == "Result"):
|
||||
# TotalRounds[thisResult[2]]+=1
|
||||
# # while Message3 is Win means this agent win this game
|
||||
# if(thisResult[3] == "Win"):
|
||||
# WinRounds[thisResult[2]]+=1
|
||||
# # while Message1 is GameState means this game is just start
|
||||
# # and tell python which game mode is
|
||||
# elif (thisResult[1] == "GameState"):
|
||||
# SCrecieved = 1
|
||||
# # while Message type is Error
|
||||
# elif(thisResult[0] == "Error"):
|
||||
# print(thisMessage)
|
||||
# 发送函数
|
||||
def send_string(self, data: str) -> None:
|
||||
# send a string toC#
|
||||
msg = OutgoingMessage()
|
||||
msg.write_string(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_bool(self, data: bool) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_bool(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_int(self, data: int) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_int32(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_float(self, data: float) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_float32(data)
|
||||
super().queue_message_to_send(msg)
|
||||
|
||||
def send_float_list(self, data: List[float]) -> None:
|
||||
msg = OutgoingMessage()
|
||||
msg.write_float32_list(data)
|
||||
super().queue_message_to_send(msg)
|
@ -107,6 +107,40 @@
|
||||
")\n",
|
||||
"from typing import List\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "AttributeError",
|
||||
"evalue": "'aaa' object has no attribute 'outa'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[5], line 14\u001b[0m\n\u001b[0;32m 12\u001b[0m asd \u001b[39m=\u001b[39m aaa(outa, outb)\n\u001b[0;32m 13\u001b[0m asd\u001b[39m.\u001b[39mfunc()\n\u001b[1;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(asd\u001b[39m.\u001b[39;49mouta) \u001b[39m# 输出 100\u001b[39;00m\n",
|
||||
"\u001b[1;31mAttributeError\u001b[0m: 'aaa' object has no attribute 'outa'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"class aaa():\n",
|
||||
" def __init__(self, a, b):\n",
|
||||
" self.a = a\n",
|
||||
" self.b = b\n",
|
||||
"\n",
|
||||
" def func(self):\n",
|
||||
" global outa\n",
|
||||
" outa = 100\n",
|
||||
"\n",
|
||||
"outa = 1\n",
|
||||
"outb = 2\n",
|
||||
"asd = aaa(outa, outb)\n",
|
||||
"asd.func()\n",
|
||||
"print(asd.outa) # 输出 100"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -125,7 +159,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.17"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
|
@ -62,7 +62,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mlagents_envs.environment import UnityEnvironment\n",
|
||||
"from gym_unity.envs import UnityToGymWrapper\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
|
||||
@ -368,6 +367,7 @@
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from torch import nn\n",
|
||||
"\n",
|
||||
"def layer_init(layer, std=np.sqrt(2), bias_const=0.0):\n",
|
||||
" torch.nn.init.orthogonal_(layer.weight, std)\n",
|
||||
@ -1248,6 +1248,24 @@
|
||||
"saveDir = \"C:/Users/UCUNI/OneDrive/Unity/ML-Agents/Aimbot-PPO/Aimbot-PPO-Python/PPO-Model/Chimera-1677965178-1678547500.pt\"\n",
|
||||
"torch.save(badGotoAgent,saveDir)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"print(torch.cuda.is_available())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -1266,7 +1284,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
"version": "3.9.17"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
Loading…
Reference in New Issue
Block a user