代码整理,不兼容过去的模型

代码整理,不兼容过去的模型
This commit is contained in:
Koha9 2023-07-15 20:37:23 +09:00
parent bee609d160
commit 177974888a
6 changed files with 451 additions and 262 deletions

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.linting.enabled": false
}

View File

@ -1,5 +1,4 @@
import argparse
import wandb
import time
import numpy as np
import random
@ -9,20 +8,14 @@ import torch.nn as nn
import torch.optim as optim
import atexit
from AimbotEnv import Aimbot
from tqdm import tqdm
from aimbotEnv import Aimbot
from ppoagent import PPOAgent
from ppoagent import GAE
from ppoagent import AimbotSideChannel
from airecorder import WandbRecorder
from enum import Enum
from torch.distributions.normal import Normal
from torch.distributions.categorical import Categorical
from distutils.util import strtobool
from torch.utils.tensorboard import SummaryWriter
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.side_channel import (
SideChannel,
IncomingMessage,
OutgoingMessage,
)
from typing import List
bestReward = -1
@ -62,11 +55,11 @@ BROADCASTREWARD = False
ANNEAL_LEARNING_RATE = True
CLIP_VLOSS = True
NORM_ADV = False
TRAIN = False
TRAIN = True
SAVE_MODEL = False
WANDB_TACK = True
WANDB_TACK = False
LOAD_DIR = None
LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
# public data
class Targets(Enum):
@ -86,8 +79,6 @@ BASE_LOSEREWARD = -999
TARGETNUM= 4
ENV_TIMELIMIT = 30
RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT
TotalRounds = {"Free":0,"Go":0,"Attack":0}
WinRounds = {"Free":0,"Go":0,"Attack":0}
# !!!SPECIAL PARAMETERS!!!
# change it while program is finished
@ -168,215 +159,6 @@ def parse_args():
return args
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
torch.nn.init.orthogonal_(layer.weight, std)
torch.nn.init.constant_(layer.bias, bias_const)
return layer
class PPOAgent(nn.Module):
def __init__(self, env: Aimbot,targetNum:int):
super(PPOAgent, self).__init__()
self.targetNum = targetNum
self.stateSize = env.unity_observation_shape[0]
self.agentNum = env.unity_agent_num
self.targetSize = TARGET_STATE_SIZE
self.timeSize = TIME_STATE_SIZE
self.gunSize = GUN_STATE_SIZE
self.myStateSize = MY_STATE_SIZE
self.raySize = env.unity_observation_shape[0] - TOTAL_T_SIZE
self.nonRaySize = TOTAL_T_SIZE
self.head_input_size = env.unity_observation_shape[0] - self.targetSize-self.timeSize-self.gunSize# except target state input
self.discrete_size = env.unity_discrete_size
self.discrete_shape = list(env.unity_discrete_branches)
self.continuous_size = env.unity_continuous_size
self.viewNetwork = nn.Sequential(
layer_init(nn.Linear(self.raySize, 200)),
nn.LeakyReLU()
)
self.targetNetworks = nn.ModuleList([nn.Sequential(
layer_init(nn.Linear(self.nonRaySize, 100)),
nn.LeakyReLU()
)for i in range(targetNum)])
self.middleNetworks = nn.ModuleList([nn.Sequential(
layer_init(nn.Linear(300,200)),
nn.LeakyReLU()
)for i in range(targetNum)])
self.actor_dis = nn.ModuleList([layer_init(nn.Linear(200, self.discrete_size), std=0.5) for i in range(targetNum)])
self.actor_mean = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=0.5) for i in range(targetNum)])
# self.actor_logstd = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)])
# self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))
self.actor_logstd = nn.ParameterList([nn.Parameter(torch.zeros(1,self.continuous_size))for i in range(targetNum)]) # nn.Parameter(torch.zeros(1, self.continuous_size))
self.critic = nn.ModuleList([layer_init(nn.Linear(200, 1), std=1)for i in range(targetNum)])
def get_value(self, state: torch.Tensor):
target = state[:,0].to(torch.int32) # int
thisStateNum = target.size()[0]
viewInput = state[:,-self.raySize:] # all ray input
targetInput = state[:,:self.nonRaySize]
viewLayer = self.viewNetwork(viewInput)
targetLayer = torch.stack([self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)])
middleInput = torch.cat([viewLayer,targetLayer],dim = 1)
middleLayer = torch.stack([self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)])
criticV = torch.stack([self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.critic
return criticV
def get_actions_value(self, state: torch.Tensor, actions=None):
target = state[:,0].to(torch.int32) # int
thisStateNum = target.size()[0]
viewInput = state[:,-self.raySize:] # all ray input
targetInput = state[:,:self.nonRaySize]
viewLayer = self.viewNetwork(viewInput)
targetLayer = torch.stack([self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)])
middleInput = torch.cat([viewLayer,targetLayer],dim = 1)
middleLayer = torch.stack([self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)])
# discrete
# 递归targets的数量,既agent数来实现根据target不同来选用对应的输出网络计算输出
dis_logits = torch.stack([self.actor_dis[target[i]](middleLayer[i]) for i in range(thisStateNum)])
split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)
multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]
# continuous
actions_mean = torch.stack([self.actor_mean[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_mean(hidden)
# action_logstd = torch.stack([self.actor_logstd[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_logstd(hidden)
# action_logstd = self.actor_logstd.expand_as(actions_mean) # self.actor_logstd.expand_as(actions_mean)
action_logstd = torch.stack([torch.squeeze(self.actor_logstd[target[i]],0) for i in range(thisStateNum)])
# print(action_logstd)
action_std = torch.exp(action_logstd) # torch.exp(action_logstd)
con_probs = Normal(actions_mean, action_std)
# critic
criticV = torch.stack([self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.critic
if actions is None:
if args.train:
# select actions base on probability distribution model
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
conAct = con_probs.sample()
actions = torch.cat([disAct.T, conAct], dim=1)
else:
# select actions base on best probability distribution
# disAct = torch.stack([torch.argmax(logit, dim=1) for logit in split_logits])
conAct = actions_mean
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
conAct = con_probs.sample()
actions = torch.cat([disAct.T, conAct], dim=1)
else:
disAct = actions[:, 0 : env.unity_discrete_type].T
conAct = actions[:, env.unity_discrete_type :]
dis_log_prob = torch.stack(
[ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]
)
dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
return (
actions,
dis_log_prob.sum(0),
dis_entropy.sum(0),
con_probs.log_prob(conAct).sum(1),
con_probs.entropy().sum(1),
criticV,
)
def GAE(agent, args, rewards, dones, values, next_obs, next_done):
# GAE
with torch.no_grad():
next_value = agent.get_value(next_obs).reshape(1, -1)
data_size = rewards.size()[0]
if args.gae:
advantages = torch.zeros_like(rewards).to(device)
lastgaelam = 0
for t in reversed(range(data_size)):
if t == data_size - 1:
nextnonterminal = 1.0 - next_done
nextvalues = next_value
else:
nextnonterminal = 1.0 - dones[t + 1]
nextvalues = values[t + 1]
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
advantages[t] = lastgaelam = (
delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
)
returns = advantages + values
else:
returns = torch.zeros_like(rewards).to(device)
for t in reversed(range(data_size)):
if t == data_size - 1:
nextnonterminal = 1.0 - next_done
next_return = next_value
else:
nextnonterminal = 1.0 - dones[t + 1]
next_return = returns[t + 1]
returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
advantages = returns - values
return advantages, returns
class AimbotSideChannel(SideChannel):
def __init__(self, channel_id: uuid.UUID) -> None:
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:
global SCrecieved # make sure this variable is global
"""
Note: We must implement this method of the SideChannel interface to
receive messages from Unity
Message will be sent like this:
"Warning|Message1|Message2|Message3" or
"Error|Message1|Message2|Message3"
"""
thisMessage = msg.read_string()
thisResult = thisMessage.split("|")
if(thisResult[0] == "result"):
TotalRounds[thisResult[1]]+=1
if(thisResult[2] == "Win"):
WinRounds[thisResult[1]]+=1
#print(TotalRounds)
#print(WinRounds)
elif(thisResult[0] == "Error"):
print(thisMessage)
# # while Message type is Warning
# if(thisResult[0] == "Warning"):
# # while Message1 is result means one game is over
# if (thisResult[1] == "Result"):
# TotalRounds[thisResult[2]]+=1
# # while Message3 is Win means this agent win this game
# if(thisResult[3] == "Win"):
# WinRounds[thisResult[2]]+=1
# # while Message1 is GameState means this game is just start
# # and tell python which game mode is
# elif (thisResult[1] == "GameState"):
# SCrecieved = 1
# # while Message type is Error
# elif(thisResult[0] == "Error"):
# print(thisMessage)
# 发送函数
def send_string(self, data: str) -> None:
# send a string toC#
msg = OutgoingMessage()
msg.write_string(data)
super().queue_message_to_send(msg)
def send_bool(self, data: bool) -> None:
msg = OutgoingMessage()
msg.write_bool(data)
super().queue_message_to_send(msg)
def send_int(self, data: int) -> None:
msg = OutgoingMessage()
msg.write_int32(data)
super().queue_message_to_send(msg)
def send_float(self, data: float) -> None:
msg = OutgoingMessage()
msg.write_float32(data)
super().queue_message_to_send(msg)
def send_float_list(self, data: List[float]) -> None:
msg = OutgoingMessage()
msg.write_float32_list(data)
super().queue_message_to_send(msg)
def broadCastEndReward(rewardBF:list,remainTime:float):
thisRewardBF = rewardBF
if (rewardBF[-1]<=-500):
@ -404,7 +186,16 @@ if __name__ == "__main__":
aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID);
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel])
if args.load_dir is None:
agent = PPOAgent(env,TARGETNUM).to(device)
agent = PPOAgent(
env = env,
trainAgent=args.train,
targetNum=TARGETNUM,
target_state_size= TARGET_STATE_SIZE,
time_state_size=TIME_STATE_SIZE,
gun_state_size=GUN_STATE_SIZE,
my_state_size=MY_STATE_SIZE,
total_t_size=TOTAL_T_SIZE,
).to(device)
else:
agent = torch.load(args.load_dir)
# freeze
@ -420,23 +211,7 @@ if __name__ == "__main__":
# Tensorboard and WandB Recorder
run_name = f"{game_type}_{args.seed}_{int(time.time())}"
if args.wandb_track:
wandb.init(
project=game_name,
entity=args.wandb_entity,
sync_tensorboard=True,
config=vars(args),
name=run_name,
monitor_gym=True,
save_code=True,
)
writer = SummaryWriter(f"runs/{run_name}")
writer.add_text(
"hyperparameters",
"|param|value|\n|-|-|\n%s"
% ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
)
wdb_recorder = WandbRecorder(game_name, game_type, run_name, args)
@atexit.register
def save_model():
@ -538,6 +313,7 @@ if __name__ == "__main__":
torch.tensor(values_bf[i]).to(device),
torch.tensor(next_state[i]).to(device).unsqueeze(0),
torch.Tensor([next_done[i]]).to(device),
device,
)
# send memories to training datasets
obs[roundTargetType] = torch.cat((obs[roundTargetType], torch.tensor(ob_bf[i]).to(device)), 0)
@ -599,6 +375,7 @@ if __name__ == "__main__":
torch.tensor(values_bf[i]).to(device),
torch.Tensor(next_state[i]).to(device).unsqueeze(dim = 0),
torch.Tensor([next_done[i]]).to(device),
device
)
# send memories to training datasets
obs[roundTargetType] = torch.cat((obs[roundTargetType], torch.tensor(ob_bf[i]).to(device)), 0)
@ -629,6 +406,7 @@ if __name__ == "__main__":
i += 1
if args.train:
# train mode on
meanRewardList = [] # for WANDB
# loop all tarining queue
for thisT in trainQueue:
@ -766,17 +544,24 @@ if __name__ == "__main__":
returns[thisT] = torch.tensor([]).to(device)
# record rewards for plotting purposes
writer.add_scalar(f"Target{targetName}/value_loss", v_loss.item(), target_steps[thisT])
writer.add_scalar(f"Target{targetName}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT])
writer.add_scalar(f"Target{targetName}/con_policy_loss", con_pg_loss.item(), target_steps[thisT])
writer.add_scalar(f"Target{targetName}/total_loss", loss.item(), target_steps[thisT])
writer.add_scalar(f"Target{targetName}/entropy_loss", entropy_loss.item(), target_steps[thisT])
writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT])
writer.add_scalar(f"Target{targetName}/WinRatio", WinRounds[targetName]/TotalRounds[targetName], target_steps[thisT])
wdb_recorder.add_target_scalar(
targetName,
thisT,
v_loss,
dis_pg_loss,
con_pg_loss,
loss,
entropy_loss,
targetRewardMean,
target_steps,
)
print(f"episode over Target{targetName} mean reward:", targetRewardMean)
TotalRewardMean = np.mean(meanRewardList)
writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
writer.add_scalar("GlobalCharts/learning_rate", optimizer.param_groups[0]["lr"], total_steps)
wdb_recorder.add_global_scalar(
TotalRewardMean,
optimizer.param_groups[0]["lr"],
total_steps,
)
# print cost time as seconds
print("cost time:", time.time() - start_time)
# New Record!
@ -785,6 +570,7 @@ if __name__ == "__main__":
saveDir = "../PPO-Model/" + run_name +"_"+ str(TotalRewardMean) + ".pt"
torch.save(agent, saveDir)
else:
# train mode off
meanRewardList = [] # for WANDB
# while not in training mode, clear the buffer
for thisT in trainQueue:
@ -804,14 +590,13 @@ if __name__ == "__main__":
returns[thisT] = torch.tensor([]).to(device)
# record rewards for plotting purposes
writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT])
writer.add_scalar(f"Target{targetName}/WinRatio", WinRounds[targetName]/TotalRounds[targetName], target_steps[thisT])
wdb_recorder.writer.add_scalar(f"Target{targetName}/Reward", targetRewardMean, target_steps[thisT])
wdb_recorder.add_win_ratio(targetName,target_steps[thisT])
print(f"episode over Target{targetName} mean reward:", targetRewardMean)
TotalRewardMean = np.mean(meanRewardList)
writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
saveDir = "../PPO-Model/"+ run_name + "_last.pt"
torch.save(agent, saveDir)
env.close()
writer.close()
wdb_recorder.writer.close()

View File

@ -0,0 +1,82 @@
import wandb
import time
from torch.utils.tensorboard import SummaryWriter
total_rounds = {"Free": 0, "Go": 0, "Attack": 0}
win_rounds = {"Free": 0, "Go": 0, "Attack": 0}
# class for wandb recording
class WandbRecorder:
def __init__(self, game_name: str, game_type: str, run_name: str, _args) -> None:
# init wandb
self.game_name = game_name
self.game_type = game_type
self._args = _args
self.run_name = run_name
if self._args.wandb_track:
wandb.init(
project=self.game_name,
entity=self._args.wandb_entity,
sync_tensorboard=True,
config=vars(self._args),
name=self.run_name,
monitor_gym=True,
save_code=True,
)
self.writer = SummaryWriter(f"runs/{self.run_name}")
self.writer.add_text(
"hyperparameters",
"|param|value|\n|-|-|\n%s"
% ("\n".join([f"|{key}|{value}|" for key, value in vars(self._args).items()])),
)
def add_target_scalar(
self,
target_name,
thisT,
v_loss,
dis_pg_loss,
con_pg_loss,
loss,
entropy_loss,
target_reward_mean,
target_steps,
):
# fmt:off
self.writer.add_scalar(
f"Target{target_name}/value_loss", v_loss.item(), target_steps[thisT]
)
self.writer.add_scalar(
f"Target{target_name}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT]
)
self.writer.add_scalar(
f"Target{target_name}/con_policy_loss", con_pg_loss.item(), target_steps[thisT]
)
self.writer.add_scalar(
f"Target{target_name}/total_loss", loss.item(), target_steps[thisT]
)
self.writer.add_scalar(
f"Target{target_name}/entropy_loss", entropy_loss.item(), target_steps[thisT]
)
self.writer.add_scalar(
f"Target{target_name}/Reward", target_reward_mean, target_steps[thisT]
)
self.writer.add_scalar(
f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps[thisT],
)
# fmt:on
def add_global_scalar(
self,
total_reward_mean,
learning_rate,
total_steps,
):
self.writer.add_scalar("GlobalCharts/TotalRewardMean", total_reward_mean, total_steps)
self.writer.add_scalar("GlobalCharts/learning_rate", learning_rate, total_steps)
def add_win_ratio(self, target_name, target_steps):
self.writer.add_scalar(
f"Target{target_name}/WinRatio", win_rounds[target_name] / total_rounds[target_name], target_steps,
)

View File

@ -0,0 +1,267 @@
import numpy as np
import torch
import uuid
import airecorder
from torch import nn
from typing import List
from aimbotEnv import Aimbot
from torch.distributions.normal import Normal
from torch.distributions.categorical import Categorical
from mlagents_envs.side_channel.side_channel import (
SideChannel,
IncomingMessage,
OutgoingMessage,
)
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
nn.init.orthogonal_(layer.weight, std)
nn.init.constant_(layer.bias, bias_const)
return layer
class PPOAgent(nn.Module):
def __init__(
self,
env: Aimbot,
trainAgent: bool,
targetNum: int,
target_state_size: int,
time_state_size: int,
gun_state_size: int,
my_state_size: int,
total_t_size: int,
):
super(PPOAgent, self).__init__()
self.trainAgent = trainAgent
self.targetNum = targetNum
self.stateSize = env.unity_observation_shape[0]
self.agentNum = env.unity_agent_num
self.targetSize = target_state_size
self.timeSize = time_state_size
self.gunSize = gun_state_size
self.myStateSize = my_state_size
self.raySize = env.unity_observation_shape[0] - total_t_size
self.nonRaySize = total_t_size
self.head_input_size = (
env.unity_observation_shape[0] - self.targetSize - self.timeSize - self.gunSize
) # except target state input
self.unityDiscreteType = env.unity_discrete_type
self.discrete_size = env.unity_discrete_size
self.discrete_shape = list(env.unity_discrete_branches)
self.continuous_size = env.unity_continuous_size
self.viewNetwork = nn.Sequential(layer_init(nn.Linear(self.raySize, 200)), nn.LeakyReLU())
self.targetNetworks = nn.ModuleList(
[
nn.Sequential(layer_init(nn.Linear(self.nonRaySize, 100)), nn.LeakyReLU())
for i in range(targetNum)
]
)
self.middleNetworks = nn.ModuleList(
[
nn.Sequential(layer_init(nn.Linear(300, 200)), nn.LeakyReLU())
for i in range(targetNum)
]
)
self.actor_dis = nn.ModuleList(
[layer_init(nn.Linear(200, self.discrete_size), std=0.5) for i in range(targetNum)]
)
self.actor_mean = nn.ModuleList(
[layer_init(nn.Linear(200, self.continuous_size), std=0.5) for i in range(targetNum)]
)
# self.actor_logstd = nn.ModuleList([layer_init(nn.Linear(200, self.continuous_size), std=1) for i in range(targetNum)])
# self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))
self.actor_logstd = nn.ParameterList(
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(targetNum)]
) # nn.Parameter(torch.zeros(1, self.continuous_size))
self.critic = nn.ModuleList(
[layer_init(nn.Linear(200, 1), std=1) for i in range(targetNum)]
)
def get_value(self, state: torch.Tensor):
target = state[:, 0].to(torch.int32) # int
thisStateNum = target.size()[0]
viewInput = state[:, -self.raySize :] # all ray input
targetInput = state[:, : self.nonRaySize]
viewLayer = self.viewNetwork(viewInput)
targetLayer = torch.stack(
[self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)]
)
middleInput = torch.cat([viewLayer, targetLayer], dim=1)
middleLayer = torch.stack(
[self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)]
)
criticV = torch.stack(
[self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]
) # self.critic
return criticV
def get_actions_value(self, state: torch.Tensor, actions=None):
target = state[:, 0].to(torch.int32) # int
thisStateNum = target.size()[0]
viewInput = state[:, -self.raySize :] # all ray input
targetInput = state[:, : self.nonRaySize]
viewLayer = self.viewNetwork(viewInput)
targetLayer = torch.stack(
[self.targetNetworks[target[i]](targetInput[i]) for i in range(thisStateNum)]
)
middleInput = torch.cat([viewLayer, targetLayer], dim=1)
middleLayer = torch.stack(
[self.middleNetworks[target[i]](middleInput[i]) for i in range(thisStateNum)]
)
# discrete
# 递归targets的数量,既agent数来实现根据target不同来选用对应的输出网络计算输出
dis_logits = torch.stack(
[self.actor_dis[target[i]](middleLayer[i]) for i in range(thisStateNum)]
)
split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)
multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]
# continuous
actions_mean = torch.stack(
[self.actor_mean[target[i]](middleLayer[i]) for i in range(thisStateNum)]
) # self.actor_mean(hidden)
# action_logstd = torch.stack([self.actor_logstd[target[i]](middleLayer[i]) for i in range(thisStateNum)]) # self.actor_logstd(hidden)
# action_logstd = self.actor_logstd.expand_as(actions_mean) # self.actor_logstd.expand_as(actions_mean)
action_logstd = torch.stack(
[torch.squeeze(self.actor_logstd[target[i]], 0) for i in range(thisStateNum)]
)
# print(action_logstd)
action_std = torch.exp(action_logstd) # torch.exp(action_logstd)
con_probs = Normal(actions_mean, action_std)
# critic
criticV = torch.stack(
[self.critic[target[i]](middleLayer[i]) for i in range(thisStateNum)]
) # self.critic
if actions is None:
if self.trainAgent:
# select actions base on probability distribution model
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
conAct = con_probs.sample()
actions = torch.cat([disAct.T, conAct], dim=1)
else:
# select actions base on best probability distribution
# disAct = torch.stack([torch.argmax(logit, dim=1) for logit in split_logits])
conAct = actions_mean
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
conAct = con_probs.sample()
actions = torch.cat([disAct.T, conAct], dim=1)
else:
disAct = actions[:, 0 : self.unityDiscreteType].T
conAct = actions[:, self.unityDiscreteType :]
dis_log_prob = torch.stack(
[ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]
)
dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
return (
actions,
dis_log_prob.sum(0),
dis_entropy.sum(0),
con_probs.log_prob(conAct).sum(1),
con_probs.entropy().sum(1),
criticV,
)
def GAE(agent, args, rewards, dones, values, next_obs, next_done, device):
# GAE
with torch.no_grad():
next_value = agent.get_value(next_obs).reshape(1, -1)
data_size = rewards.size()[0]
if args.gae:
advantages = torch.zeros_like(rewards).to(device)
lastgaelam = 0
for t in reversed(range(data_size)):
if t == data_size - 1:
nextnonterminal = 1.0 - next_done
nextvalues = next_value
else:
nextnonterminal = 1.0 - dones[t + 1]
nextvalues = values[t + 1]
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
advantages[t] = lastgaelam = (
delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
)
returns = advantages + values
else:
returns = torch.zeros_like(rewards).to(device)
for t in reversed(range(data_size)):
if t == data_size - 1:
nextnonterminal = 1.0 - next_done
next_return = next_value
else:
nextnonterminal = 1.0 - dones[t + 1]
next_return = returns[t + 1]
returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
advantages = returns - values
return advantages, returns
class AimbotSideChannel(SideChannel):
def __init__(self, channel_id: uuid.UUID) -> None:
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:
global SCrecieved # make sure this variable is global
"""
Note: We must implement this method of the SideChannel interface to
receive messages from Unity
Message will be sent like this:
"Warning|Message1|Message2|Message3" or
"Error|Message1|Message2|Message3"
"""
thisMessage = msg.read_string()
thisResult = thisMessage.split("|")
if(thisResult[0] == "result"):
airecorder.total_rounds[thisResult[1]]+=1
if(thisResult[2] == "Win"):
airecorder.win_rounds[thisResult[1]]+=1
#print(TotalRounds)
#print(WinRounds)
elif(thisResult[0] == "Error"):
print(thisMessage)
# # while Message type is Warning
# if(thisResult[0] == "Warning"):
# # while Message1 is result means one game is over
# if (thisResult[1] == "Result"):
# TotalRounds[thisResult[2]]+=1
# # while Message3 is Win means this agent win this game
# if(thisResult[3] == "Win"):
# WinRounds[thisResult[2]]+=1
# # while Message1 is GameState means this game is just start
# # and tell python which game mode is
# elif (thisResult[1] == "GameState"):
# SCrecieved = 1
# # while Message type is Error
# elif(thisResult[0] == "Error"):
# print(thisMessage)
# 发送函数
def send_string(self, data: str) -> None:
# send a string toC#
msg = OutgoingMessage()
msg.write_string(data)
super().queue_message_to_send(msg)
def send_bool(self, data: bool) -> None:
msg = OutgoingMessage()
msg.write_bool(data)
super().queue_message_to_send(msg)
def send_int(self, data: int) -> None:
msg = OutgoingMessage()
msg.write_int32(data)
super().queue_message_to_send(msg)
def send_float(self, data: float) -> None:
msg = OutgoingMessage()
msg.write_float32(data)
super().queue_message_to_send(msg)
def send_float_list(self, data: List[float]) -> None:
msg = OutgoingMessage()
msg.write_float32_list(data)
super().queue_message_to_send(msg)

View File

@ -107,6 +107,40 @@
")\n",
"from typing import List\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'aaa' object has no attribute 'outa'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[5], line 14\u001b[0m\n\u001b[0;32m 12\u001b[0m asd \u001b[39m=\u001b[39m aaa(outa, outb)\n\u001b[0;32m 13\u001b[0m asd\u001b[39m.\u001b[39mfunc()\n\u001b[1;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(asd\u001b[39m.\u001b[39;49mouta) \u001b[39m# 输出 100\u001b[39;00m\n",
"\u001b[1;31mAttributeError\u001b[0m: 'aaa' object has no attribute 'outa'"
]
}
],
"source": [
"class aaa():\n",
" def __init__(self, a, b):\n",
" self.a = a\n",
" self.b = b\n",
"\n",
" def func(self):\n",
" global outa\n",
" outa = 100\n",
"\n",
"outa = 1\n",
"outb = 2\n",
"asd = aaa(outa, outb)\n",
"asd.func()\n",
"print(asd.outa) # 输出 100"
]
}
],
"metadata": {
@ -125,7 +159,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.9.17"
},
"orig_nbformat": 4
},

View File

@ -62,7 +62,6 @@
"outputs": [],
"source": [
"from mlagents_envs.environment import UnityEnvironment\n",
"from gym_unity.envs import UnityToGymWrapper\n",
"import numpy as np\n",
"\n",
"ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
@ -368,6 +367,7 @@
],
"source": [
"import torch\n",
"from torch import nn\n",
"\n",
"def layer_init(layer, std=np.sqrt(2), bias_const=0.0):\n",
" torch.nn.init.orthogonal_(layer.weight, std)\n",
@ -1248,6 +1248,24 @@
"saveDir = \"C:/Users/UCUNI/OneDrive/Unity/ML-Agents/Aimbot-PPO/Aimbot-PPO-Python/PPO-Model/Chimera-1677965178-1678547500.pt\"\n",
"torch.save(badGotoAgent,saveDir)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"import torch\n",
"print(torch.cuda.is_available())"
]
}
],
"metadata": {
@ -1266,7 +1284,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.17"
},
"orig_nbformat": 4,
"vscode": {