hybrid dis-con action, save-load, converge wad observed
add discrete and continuous action in same NN model. model save and load. reward is increasing, converge was observed. this two models are seems good: Aimbot_9331_1667423213_hybrid_train2 Aimbot_9331_1667389873_hybrid
This commit is contained in:
parent
0dbe2013ae
commit
474032d1e8
@ -48,6 +48,11 @@ class Aimbot(gym.Env):
|
||||
self.unity_discrete_type = self.unity_action_spec.discrete_size
|
||||
# environment discrete action type. int 3+3+2=8
|
||||
self.unity_discrete_size = sum(self.unity_discrete_branches)
|
||||
# environment total action size. int 3+2=5
|
||||
self.unity_action_size = self.unity_discrete_type + self.unity_continuous_size
|
||||
# ActionExistBool
|
||||
self.unity_dis_act_exist = self.unity_discrete_type != 0
|
||||
self.unity_con_act_exist = self.unity_continuous_size != 0
|
||||
|
||||
# AGENT SPECS
|
||||
# all agents ID
|
||||
@ -85,21 +90,23 @@ class Aimbot(gym.Env):
|
||||
"""
|
||||
# take action to enviroment
|
||||
# return mextState,reward,done
|
||||
if self.unity_discrete_size == 0:
|
||||
# discrete action
|
||||
if self.unity_dis_act_exist:
|
||||
# create discrete action from actions list
|
||||
discreteActions = actions[:, 0 : self.unity_discrete_type]
|
||||
else:
|
||||
# create empty discrete action
|
||||
discreteActions = np.asarray([[0]])
|
||||
# continuous action
|
||||
if self.unity_con_act_exist:
|
||||
# create continuous actions from actions list
|
||||
continuousActions = actions[:, self.unity_discrete_type :]
|
||||
else:
|
||||
# create discrete action from actions list
|
||||
discreteActions = actions[:, 0 : self.unity_discrete_size]
|
||||
"""
|
||||
if self.unity_continuous_size == 0:
|
||||
# create empty continuous action
|
||||
continuousActions = np.asanyarray([[0.0]])
|
||||
else:
|
||||
# create continuous actions from actions list
|
||||
continuousActions = actions[:,self.unity_discrete_size :]
|
||||
"""
|
||||
continuousActions = np.asanyarray([[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]])
|
||||
|
||||
# Dummy continuous action
|
||||
# continuousActions = np.asanyarray([[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]])
|
||||
# create actionTuple
|
||||
thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
|
||||
# take action to env
|
||||
|
@ -20,12 +20,12 @@ WORKER_ID = 1
|
||||
BASE_PORT = 2002
|
||||
|
||||
|
||||
LEARNING_RATE = 2e-3
|
||||
LEARNING_RATE = 7e-4
|
||||
GAMMA = 0.99
|
||||
GAE_LAMBDA = 0.95
|
||||
TOTAL_STEPS = 2000000
|
||||
STEP_NUM = 128
|
||||
MINIBATCH_NUM = 4
|
||||
STEP_NUM = 256
|
||||
MINIBATCH_NUM = 1
|
||||
EPOCHS = 4
|
||||
CLIP_COEF = 0.1
|
||||
ENTROPY_COEF = 0.01
|
||||
@ -35,9 +35,13 @@ ANNEAL_LEARNING_RATE = True
|
||||
CLIP_VLOSS = True
|
||||
NORM_ADV = True
|
||||
|
||||
WANDB_TACK = True
|
||||
LOAD_DIR = "../PPO-Model/SmallArea-256-128-hybrid.pt"
|
||||
|
||||
|
||||
def parse_args():
|
||||
# fmt: off
|
||||
# pytorch and environment parameters
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--seed", type=int, default=DEFAULT_SEED,
|
||||
help="seed of the experiment")
|
||||
@ -54,6 +58,7 @@ def parse_args():
|
||||
parser.add_argument("--total-timesteps", type=int, default=TOTAL_STEPS,
|
||||
help="total timesteps of the experiments")
|
||||
|
||||
# model parameters
|
||||
parser.add_argument("--stepNum", type=int, default=STEP_NUM,
|
||||
help="the number of steps to run in each environment per policy rollout")
|
||||
parser.add_argument("--minibatchesNum", type=int, default=MINIBATCH_NUM,
|
||||
@ -62,8 +67,13 @@ def parse_args():
|
||||
help="the K epochs to update the policy")
|
||||
parser.add_argument("--annealLR", type=lambda x: bool(strtobool(x)), default=ANNEAL_LEARNING_RATE, nargs="?", const=True,
|
||||
help="Toggle learning rate annealing for policy and value networks")
|
||||
parser.add_argument("--wandb-entity", type=str, default=None,
|
||||
parser.add_argument("--wandb-track", type=lambda x: bool(strtobool(x)), default=WANDB_TACK, nargs="?", const=True,
|
||||
help="track on the wandb")
|
||||
parser.add_argument("--wandb-entity", type=str, default=WAND_ENTITY,
|
||||
help="the entity (team) of wandb's project")
|
||||
parser.add_argument("--load-dir", type=str, default=LOAD_DIR,
|
||||
help="load model directory")
|
||||
|
||||
# GAE
|
||||
parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
|
||||
help="Use GAE for advantage computation")
|
||||
@ -101,16 +111,17 @@ class PPOAgent(nn.Module):
|
||||
super(PPOAgent, self).__init__()
|
||||
self.discrete_size = env.unity_discrete_size
|
||||
self.discrete_shape = list(env.unity_discrete_branches)
|
||||
self.continuous_size = env.unity_continuous_size
|
||||
|
||||
self.network = nn.Sequential(
|
||||
layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 128)),
|
||||
nn.Tanh(),
|
||||
layer_init(nn.Linear(128, 128)),
|
||||
layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 256)),
|
||||
nn.ReLU(),
|
||||
layer_init(nn.Linear(128, 128)),
|
||||
layer_init(nn.Linear(256, 128)),
|
||||
nn.ReLU(),
|
||||
)
|
||||
self.dis_Actor = layer_init(nn.Linear(128, self.discrete_size), std=0.01)
|
||||
self.actor_dis = layer_init(nn.Linear(128, self.discrete_size), std=0.01)
|
||||
self.actor_mean = layer_init(nn.Linear(128, self.continuous_size), std=0.01)
|
||||
self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))
|
||||
self.critic = layer_init(nn.Linear(128, 1), std=1)
|
||||
|
||||
def get_value(self, state: torch.Tensor):
|
||||
@ -118,16 +129,35 @@ class PPOAgent(nn.Module):
|
||||
|
||||
def get_actions_value(self, state: torch.Tensor, actions=None):
|
||||
hidden = self.network(state)
|
||||
dis_logits = self.dis_Actor(hidden)
|
||||
# discrete
|
||||
dis_logits = self.actor_dis(hidden)
|
||||
split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)
|
||||
multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]
|
||||
# continuous
|
||||
actions_mean = self.actor_mean(hidden)
|
||||
action_logstd = self.actor_logstd.expand_as(actions_mean)
|
||||
action_std = torch.exp(action_logstd)
|
||||
con_probs = Normal(actions_mean, action_std)
|
||||
|
||||
if actions is None:
|
||||
actions = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
|
||||
log_prob = torch.stack(
|
||||
[ctgr.log_prob(act) for act, ctgr in zip(actions, multi_categoricals)]
|
||||
disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
|
||||
conAct = con_probs.sample()
|
||||
actions = torch.cat([disAct.T, conAct], dim=1)
|
||||
else:
|
||||
disAct = actions[:, 0 : env.unity_discrete_type].T
|
||||
conAct = actions[:, env.unity_discrete_type :]
|
||||
dis_log_prob = torch.stack(
|
||||
[ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]
|
||||
)
|
||||
dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
|
||||
return (
|
||||
actions,
|
||||
dis_log_prob.sum(0),
|
||||
dis_entropy.sum(0),
|
||||
con_probs.log_prob(conAct).sum(1),
|
||||
con_probs.entropy().sum(1),
|
||||
self.critic(hidden),
|
||||
)
|
||||
entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
|
||||
return actions.T, log_prob.sum(0), entropy.sum(0), self.critic(hidden)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -140,12 +170,19 @@ if __name__ == "__main__":
|
||||
|
||||
# Initialize environment anget optimizer
|
||||
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport)
|
||||
if args.load_dir is None:
|
||||
agent = PPOAgent(env).to(device)
|
||||
else:
|
||||
agent = torch.load(args.load_dir)
|
||||
print("Load Agent", args.load_dir)
|
||||
print(agent.eval())
|
||||
|
||||
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
|
||||
|
||||
# Tensorboard and WandB Recorder
|
||||
game_name = "Aimbot"
|
||||
run_name = f"{game_name}__{args.seed}__{int(time.time())}"
|
||||
if args.wandb_track:
|
||||
wandb.init(
|
||||
project=run_name,
|
||||
entity=args.wandb_entity,
|
||||
@ -165,10 +202,9 @@ if __name__ == "__main__":
|
||||
|
||||
# Memory Record
|
||||
obs = torch.zeros((args.stepNum, env.unity_agent_num) + env.unity_observation_shape).to(device)
|
||||
actions = torch.zeros((args.stepNum, env.unity_agent_num) + (env.unity_discrete_type,)).to(
|
||||
device
|
||||
)
|
||||
logprobs = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
||||
actions = torch.zeros((args.stepNum, env.unity_agent_num) + (env.unity_action_size,)).to(device)
|
||||
dis_logprobs = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
||||
con_logprobs = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
||||
rewards = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
||||
dones = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
||||
values = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
||||
@ -198,13 +234,14 @@ if __name__ == "__main__":
|
||||
|
||||
with torch.no_grad():
|
||||
# predict actions
|
||||
action, logprob, _, value = agent.get_actions_value(next_obs)
|
||||
action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value(next_obs)
|
||||
value = value.flatten()
|
||||
next_obs, reward, done = env.step(action.cpu().numpy())
|
||||
|
||||
# save memories
|
||||
actions[step] = action
|
||||
logprobs[step] = logprob
|
||||
dis_logprobs[step] = dis_logprob
|
||||
con_logprobs[step] = con_logprob
|
||||
values[step] = value
|
||||
rewards[step] = torch.tensor(reward).to(device).view(-1)
|
||||
next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
|
||||
@ -241,15 +278,16 @@ if __name__ == "__main__":
|
||||
|
||||
# flatten the batch
|
||||
b_obs = obs.reshape((-1,) + env.unity_observation_shape)
|
||||
b_logprobs = logprobs.reshape(-1)
|
||||
b_actions = actions.reshape((-1,) + (env.unity_discrete_type,))
|
||||
b_dis_logprobs = dis_logprobs.reshape(-1)
|
||||
b_con_logprobs = con_logprobs.reshape(-1)
|
||||
b_actions = actions.reshape((-1,) + (env.unity_action_size,))
|
||||
b_advantages = advantages.reshape(-1)
|
||||
b_returns = returns.reshape(-1)
|
||||
b_values = values.reshape(-1)
|
||||
|
||||
# Optimizing the policy and value network
|
||||
b_inds = np.arange(args.batch_size)
|
||||
clipfracs = []
|
||||
#clipfracs = []
|
||||
for epoch in range(args.epochs):
|
||||
# shuffle all datasets
|
||||
np.random.shuffle(b_inds)
|
||||
@ -264,26 +302,42 @@ if __name__ == "__main__":
|
||||
mb_advantages.std() + 1e-8
|
||||
)
|
||||
|
||||
# ratio
|
||||
_, newlogprob, entropy, newvalue = agent.get_actions_value(
|
||||
b_obs[mb_inds], b_actions.long()[mb_inds].T
|
||||
)
|
||||
logratio = newlogprob - b_logprobs[mb_inds]
|
||||
ratio = logratio.exp()
|
||||
(
|
||||
_,
|
||||
new_dis_logprob,
|
||||
dis_entropy,
|
||||
new_con_logprob,
|
||||
con_entropy,
|
||||
newvalue,
|
||||
) = agent.get_actions_value(b_obs[mb_inds], b_actions[mb_inds])
|
||||
# discrete ratio
|
||||
dis_logratio = new_dis_logprob - b_dis_logprobs[mb_inds]
|
||||
dis_ratio = dis_logratio.exp()
|
||||
# continuous ratio
|
||||
con_logratio = new_con_logprob - b_con_logprobs[mb_inds]
|
||||
con_ratio = con_logratio.exp()
|
||||
|
||||
"""
|
||||
# early stop
|
||||
with torch.no_grad():
|
||||
# calculate approx_kl http://joschu.net/blog/kl-approx.html
|
||||
old_approx_kl = (-logratio).mean()
|
||||
approx_kl = ((ratio - 1) - logratio).mean()
|
||||
clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()]
|
||||
"""
|
||||
|
||||
# Policy loss
|
||||
pg_loss1 = -mb_advantages * ratio
|
||||
pg_loss2 = -mb_advantages * torch.clamp(
|
||||
ratio, 1 - args.clip_coef, 1 + args.clip_coef
|
||||
# discrete Policy loss
|
||||
dis_pg_loss_orig = -mb_advantages * dis_ratio
|
||||
dis_pg_loss_clip = -mb_advantages * torch.clamp(
|
||||
dis_ratio, 1 - args.clip_coef, 1 + args.clip_coef
|
||||
)
|
||||
pg_loss = torch.max(pg_loss1, pg_loss2).mean()
|
||||
dis_pg_loss = torch.max(dis_pg_loss_orig, dis_pg_loss_clip).mean()
|
||||
# continuous Policy loss
|
||||
con_pg_loss_orig = -mb_advantages * con_ratio
|
||||
con_pg_loss_clip = -mb_advantages * torch.clamp(
|
||||
con_ratio, 1 - args.clip_coef, 1 + args.clip_coef
|
||||
)
|
||||
con_pg_loss = torch.max(con_pg_loss_orig, con_pg_loss_clip).mean()
|
||||
|
||||
# Value loss
|
||||
newvalue = newvalue.view(-1)
|
||||
@ -300,8 +354,14 @@ if __name__ == "__main__":
|
||||
else:
|
||||
v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean()
|
||||
|
||||
entropy_loss = entropy.mean()
|
||||
loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.critic_coef
|
||||
# total loss
|
||||
entropy_loss = dis_entropy.mean() + con_entropy.mean()
|
||||
loss = (
|
||||
dis_pg_loss
|
||||
+ con_pg_loss
|
||||
- entropy_loss * args.ent_coef
|
||||
+ v_loss * args.critic_coef
|
||||
)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
@ -309,19 +369,26 @@ if __name__ == "__main__":
|
||||
nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
|
||||
optimizer.step()
|
||||
|
||||
"""
|
||||
if args.target_kl is not None:
|
||||
if approx_kl > args.target_kl:
|
||||
break
|
||||
"""
|
||||
# record rewards for plotting purposes
|
||||
writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step)
|
||||
writer.add_scalar("losses/value_loss", v_loss.item(), global_step)
|
||||
writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step)
|
||||
writer.add_scalar("losses/entropy", entropy_loss.item(), global_step)
|
||||
writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step)
|
||||
writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step)
|
||||
writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step)
|
||||
writer.add_scalar("losses/dis_policy_loss", dis_pg_loss.item(), global_step)
|
||||
writer.add_scalar("losses/con_policy_loss", con_pg_loss.item(), global_step)
|
||||
writer.add_scalar("losses/total_loss", loss.item(), global_step)
|
||||
writer.add_scalar("losses/entropy_loss", entropy_loss.item(), global_step)
|
||||
# writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step)
|
||||
# writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step)
|
||||
#writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step)
|
||||
print("SPS:", int(global_step / (time.time() - start_time)))
|
||||
writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
|
||||
writer.add_scalar(
|
||||
"charts/Reward", np.mean(rewards.to("cpu").detach().numpy().copy()), global_step
|
||||
)
|
||||
|
||||
env.close()
|
||||
writer.close()
|
||||
|
@ -303,6 +303,134 @@
|
||||
"(128, 4) + env.unity_observation_shape\n",
|
||||
"env.reset()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tensor([[1, 2, 3],\n",
|
||||
" [1, 2, 3],\n",
|
||||
" [1, 2, 3],\n",
|
||||
" [1, 2, 3]], device='cuda:0')\n",
|
||||
"tensor([[1],\n",
|
||||
" [2],\n",
|
||||
" [3],\n",
|
||||
" [4]], device='cuda:0')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tensor([[1, 2, 3, 1],\n",
|
||||
" [1, 2, 3, 2],\n",
|
||||
" [1, 2, 3, 3],\n",
|
||||
" [1, 2, 3, 4]], device='cuda:0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"aa = torch.tensor([[1,2,3],[1,2,3],[1,2,3],[1,2,3]]).to(\"cuda:0\")\n",
|
||||
"bb = torch.tensor([[1],[2],[3],[4]]).to(\"cuda:0\")\n",
|
||||
"print(aa)\n",
|
||||
"print(bb)\n",
|
||||
"torch.cat([aa,bb],axis = 1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "AttributeError",
|
||||
"evalue": "Can't get attribute 'PPOAgent' on <module '__main__'>",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_31348\\1930153251.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mmymodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"../PPO-Model/SmallArea-256-128-hybrid.pt\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mmymodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0meval\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36mload\u001b[1;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[0;32m 710\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mseek\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0morig_position\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 711\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 712\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_load\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_zipfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 713\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_legacy_load\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 714\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36m_load\u001b[1;34m(zip_file, map_location, pickle_module, pickle_file, **pickle_load_args)\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[0munpickler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mUnpicklerWrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1048\u001b[0m \u001b[0munpickler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpersistent_load\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpersistent_load\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1049\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0munpickler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1050\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1051\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_utils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_loaded_sparse_tensors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36mfind_class\u001b[1;34m(self, mod_name, name)\u001b[0m\n\u001b[0;32m 1040\u001b[0m \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1041\u001b[0m \u001b[0mmod_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mload_module_mapping\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmod_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmod_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1042\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmod_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1043\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1044\u001b[0m \u001b[1;31m# Load the data (which may in turn use `persistent_load` to load tensors)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;31mAttributeError\u001b[0m: Can't get attribute 'PPOAgent' on <module '__main__'>"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"def layer_init(layer, std=np.sqrt(2), bias_const=0.0):\n",
|
||||
" torch.nn.init.orthogonal_(layer.weight, std)\n",
|
||||
" torch.nn.init.constant_(layer.bias, bias_const)\n",
|
||||
" return layer\n",
|
||||
"\n",
|
||||
"class PPOAgent(nn.Module):\n",
|
||||
" def __init__(self, env: Aimbot):\n",
|
||||
" super(PPOAgent, self).__init__()\n",
|
||||
" self.discrete_size = env.unity_discrete_size\n",
|
||||
" self.discrete_shape = list(env.unity_discrete_branches)\n",
|
||||
" self.continuous_size = env.unity_continuous_size\n",
|
||||
"\n",
|
||||
" self.network = nn.Sequential(\n",
|
||||
" layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 256)),\n",
|
||||
" nn.ReLU(),\n",
|
||||
" layer_init(nn.Linear(256, 128)),\n",
|
||||
" nn.ReLU(),\n",
|
||||
" )\n",
|
||||
" self.actor_dis = layer_init(nn.Linear(128, self.discrete_size), std=0.01)\n",
|
||||
" self.actor_mean = layer_init(nn.Linear(128, self.continuous_size), std=0.01)\n",
|
||||
" self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))\n",
|
||||
" self.critic = layer_init(nn.Linear(128, 1), std=1)\n",
|
||||
"\n",
|
||||
" def get_value(self, state: torch.Tensor):\n",
|
||||
" return self.critic(self.network(state))\n",
|
||||
"\n",
|
||||
" def get_actions_value(self, state: torch.Tensor, actions=None):\n",
|
||||
" hidden = self.network(state)\n",
|
||||
" # discrete\n",
|
||||
" dis_logits = self.actor_dis(hidden)\n",
|
||||
" split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)\n",
|
||||
" multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]\n",
|
||||
" # continuous\n",
|
||||
" actions_mean = self.actor_mean(hidden)\n",
|
||||
" action_logstd = self.actor_logstd.expand_as(actions_mean)\n",
|
||||
" action_std = torch.exp(action_logstd)\n",
|
||||
" con_probs = Normal(actions_mean, action_std)\n",
|
||||
"\n",
|
||||
" if actions is None:\n",
|
||||
" disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])\n",
|
||||
" conAct = con_probs.sample()\n",
|
||||
" actions = torch.cat([disAct.T, conAct], dim=1)\n",
|
||||
" else:\n",
|
||||
" disAct = actions[:, 0 : env.unity_discrete_type].T\n",
|
||||
" conAct = actions[:, env.unity_discrete_type :]\n",
|
||||
" dis_log_prob = torch.stack(\n",
|
||||
" [ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]\n",
|
||||
" )\n",
|
||||
" dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])\n",
|
||||
" return (\n",
|
||||
" actions,\n",
|
||||
" dis_log_prob.sum(0),\n",
|
||||
" dis_entropy.sum(0),\n",
|
||||
" con_probs.log_prob(conAct).sum(1),\n",
|
||||
" con_probs.entropy().sum(1),\n",
|
||||
" self.critic(hidden),\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"mymodel = torch.load(\"../PPO-Model/SmallArea-256-128-hybrid.pt\")\n",
|
||||
"mymodel.eval()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
Loading…
Reference in New Issue
Block a user