Parallel Environment Discrete PPO finish

Parallel Environment Discrete PPO finish. Runnable.
This commit is contained in:
Koha9 2022-10-30 04:13:14 +09:00
parent 742529ccd7
commit 7497ffcb0f
7 changed files with 1087 additions and 23 deletions

3
.gitignore vendored
View File

@ -76,8 +76,11 @@ crashlytics-build.properties
/Aimbot-PPO-Python/.vscode/
/Aimbot-PPO-Python/.mypy_cache/
/Aimbot-PPO-Python/__pycache__/
/Aimbot-PPO-Python/Tensorflow/__pycache__/
/Aimbot-PPO-Python/Pytorch/__pycache__/
/Aimbot-PPO-Python/Backup/
/Aimbot-PPO-Python/Build-MultiScene-WithLoad/
/Aimbot-PPO-Python/Build-CloseEnemyCut/
/Aimbot-PPO-Python/Build-ParallelEnv/
/Aimbot-PPO-Python/PPO-Model/
/Aimbot-PPO-Python/GAIL-Expert-Data/

View File

@ -0,0 +1,161 @@
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.environment import UnityEnvironment
import numpy as np
from numpy import ndarray
class makeEnv(object):
def __init__(
self,
envPath: str,
workerID: int = 1,
basePort: int = 100,
stackSize: int = 1,
stackIntercal: int = 0,
):
self.env = UnityEnvironment(
file_name=envPath,
seed=1,
side_channels=[],
worker_id=workerID,
base_port=basePort,
)
self.env.reset()
# get enviroment specs
self.LOAD_DIR_SIZE_IN_STATE = 3
self.TRACKED_AGENT = -1
self.BEHA_SPECS = self.env.behavior_specs
self.BEHA_NAME = list(self.BEHA_SPECS)[0]
self.SPEC = self.BEHA_SPECS[self.BEHA_NAME]
self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
self.ACTION_SPEC = self.SPEC.action_spec # action specs
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size
self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size
self.SINGLE_STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
self.STATE_SIZE = self.SINGLE_STATE_SIZE * stackSize
# stacked State
self.STACK_SIZE = stackSize
self.STATE_BUFFER_SIZE = stackSize + ((stackSize - 1) * stackIntercal)
self.STACK_INDEX = list(range(0, self.STATE_BUFFER_SIZE, stackIntercal + 1))
self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
print("√√√√√Enviroment Initialized Success√√√√√")
def step(
self,
actions: list,
behaviorName: ndarray = None,
trackedAgent: int = None,
):
"""change ations list to ActionTuple then send it to enviroment
Args:
actions (list): PPO chooseAction output action list
behaviorName (ndarray, optional): behaviorName. Defaults to None.
trackedAgent (int, optional): trackedAgentID. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# take action to enviroment
# return mextState,reward,done
if self.DISCRETE_SIZE == 0:
# create empty discrete action
discreteActions = np.asarray([[0]])
else:
# create discrete action from actions list
discreteActions = np.asanyarray([actions[0 : self.DISCRETE_SIZE]])
if self.CONTINUOUS_SIZE == 0:
# create empty continuous action
continuousActions = np.asanyarray([[0.0]])
else:
# create continuous actions from actions list
continuousActions = np.asanyarray([actions[self.DISCRETE_SIZE :]])
if behaviorName is None:
behaviorName = self.BEHA_NAME
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
# create actionTuple
thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
# take action to env
self.env.set_actions(behavior_name=behaviorName, action=thisActionTuple)
self.env.step()
# get nextState & reward & done after this action
nextState, reward, done, loadDir, saveNow = self.getSteps(behaviorName, trackedAgent)
return nextState, reward, done, loadDir, saveNow
def getSteps(self, behaviorName=None, trackedAgent=None):
"""get enviroment now observations.
Include State, Reward, Done, LoadDir, SaveNow
Args:
behaviorName (_type_, optional): behaviorName. Defaults to None.
trackedAgent (_type_, optional): trackedAgent. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# get nextState & reward & done
if behaviorName is None:
behaviorName = self.BEHA_NAME
decisionSteps, terminalSteps = self.env.get_steps(behaviorName)
if self.TRACKED_AGENT == -1 and len(decisionSteps) >= 1:
self.TRACKED_AGENT = decisionSteps.agent_id[0]
if trackedAgent is None:
trackedAgent = self.TRACKED_AGENT
if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
nextState = decisionSteps[trackedAgent].obs[0]
nextState = np.reshape(
nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
saveNow = nextState[-1]
loadDir = nextState[-3:-1]
nextState = nextState[:-3]
reward = decisionSteps[trackedAgent].reward
done = False
if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
nextState = terminalSteps[trackedAgent].obs[0]
nextState = np.reshape(
nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
saveNow = nextState[-1]
loadDir = nextState[-3:-1]
nextState = nextState[:-3]
reward = terminalSteps[trackedAgent].reward
done = True
# stack state
stackedStates = self.stackStates(nextState)
return stackedStates, reward, done, loadDir, saveNow
def reset(self):
"""reset enviroment and get observations
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# reset buffer
self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
# reset env
self.env.reset()
nextState, reward, done, loadDir, saveNow = self.getSteps()
return nextState, reward, done, loadDir, saveNow
def stackStates(self, state):
# save buffer
self.statesBuffer[0:-1] = self.statesBuffer[1:]
self.statesBuffer[-1] = state
# return stacked states
return np.reshape(self.statesBuffer[self.STACK_INDEX], (self.STATE_SIZE))
def render(self):
"""render enviroment"""
self.env.render()

View File

@ -0,0 +1,146 @@
import gym
import numpy as np
from numpy import ndarray
from mlagents_envs.base_env import ActionTuple
from mlagents_envs.environment import UnityEnvironment
class Aimbot(gym.Env):
def __init__(
self,
envPath: str,
workerID: int = 1,
basePort: int = 100,
):
super(Aimbot, self).__init__()
self.env = UnityEnvironment(
file_name=envPath,
seed=1,
side_channels=[],
worker_id=workerID,
base_port=basePort,
)
self.env.reset()
# all behavior_specs
self.unity_specs = self.env.behavior_specs
# environment behavior name
self.unity_beha_name = list(self.unity_specs)[0]
# environment behavior spec
self.unity_specs = self.unity_specs[self.unity_beha_name]
# environment observation_space
self.unity_obs_specs = self.unity_specs.observation_specs[0]
# environment action specs
self.unity_action_spec = self.unity_specs.action_spec
# environment sample observation
decisionSteps, _ = self.env.get_steps(self.unity_beha_name)
# OBSERVATION SPECS
# environment state shape. like tuple:(93,)
self.unity_observation_shape = self.unity_obs_specs.shape
# ACTION SPECS
# environment continuous action number. int
self.unity_continuous_size = self.unity_action_spec.continuous_size
# environment discrete action shapes. list (3,3,2)
self.unity_discrete_branches = self.unity_action_spec.discrete_branches
# environment discrete action type. int 3
self.unity_discrete_type = self.unity_action_spec.discrete_size
# environment discrete action type. int 3+3+2=8
self.unity_discrete_size = sum(self.unity_discrete_branches)
# AGENT SPECS
# all agents ID
self.unity_agent_IDS = decisionSteps.agent_id
# agents number
self.unity_agent_num = len(self.unity_agent_IDS)
def reset(self):
"""reset enviroment and get observations
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# reset env
self.env.reset()
nextState, reward, done = self.getSteps()
return nextState, reward, done
# TODO:
# delete all stack state DONE
# getstep State disassembly function DONE
# delete agent selection function DONE
# self.step action wrapper function DONE
def step(
self,
actions: ndarray,
):
"""change ations list to ActionTuple then send it to enviroment
Args:
actions (ndarray): PPO chooseAction output action list.(agentNum,actionNum)
Returns:
ndarray: nextState, reward, done
"""
# take action to enviroment
# return mextState,reward,done
if self.unity_discrete_size == 0:
# create empty discrete action
discreteActions = np.asarray([[0]])
else:
# create discrete action from actions list
discreteActions = actions[:, 0 : self.unity_discrete_size]
"""
if self.unity_continuous_size == 0:
# create empty continuous action
continuousActions = np.asanyarray([[0.0]])
else:
# create continuous actions from actions list
continuousActions = actions[:,self.unity_discrete_size :]
"""
continuousActions = np.asanyarray([[0.0], [0.0], [0.0], [0.0]])
# create actionTuple
thisActionTuple = ActionTuple(continuous=continuousActions, discrete=discreteActions)
# take action to env
self.env.set_actions(behavior_name=self.unity_beha_name, action=thisActionTuple)
self.env.step()
# get nextState & reward & done after this action
nextStates, rewards, dones = self.getSteps()
return nextStates, rewards, dones
def getSteps(self):
"""get enviroment now observations.
Include State, Reward, Done
Args:
Returns:
ndarray: nextState, reward, done
"""
# get nextState & reward & done
decisionSteps, terminalSteps = self.env.get_steps(self.unity_beha_name)
nextStates = []
dones = []
rewards = []
for thisAgentID in self.unity_agent_IDS:
# while Episode over agentID will both in decisionSteps and terminalSteps.
# avoid redundant state and reward,
# use agentExist toggle to check if agent is already exist.
agentExist = False
# game done
if thisAgentID in terminalSteps:
nextStates.append(terminalSteps[thisAgentID].obs[0])
dones.append(True)
rewards.append(terminalSteps[thisAgentID].reward)
agentExist = True
# game not over yet and agent not in terminalSteps
if (thisAgentID in decisionSteps) and (not agentExist):
nextStates.append(decisionSteps[thisAgentID].obs[0])
dones.append(False)
rewards.append(decisionSteps[thisAgentID].reward)
return np.asarray(nextStates), rewards, dones
def close(self):
self.env.close()

View File

@ -0,0 +1,291 @@
import argparse
import time
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from AimbotEnv import Aimbot
from torch.distributions.normal import Normal
from torch.distributions.categorical import Categorical
from distutils.util import strtobool
from torch.utils.tensorboard import SummaryWriter
DEFAULT_SEED = 9331
ENV_PATH = "../Build-ParallelEnv/Aimbot-ParallelEnv"
WORKER_ID = 1
BASE_PORT = 2002
LEARNING_RATE = 2e-3
GAMMA = 0.99
GAE_LAMBDA = 0.95
TOTAL_STEPS = 2000000
STEP_NUM = 128
MINIBATCH_NUM = 4
EPOCHS = 4
CLIP_COEF = 0.1
ENTROPY_COEF = 0.01
CRITIC_COEF = 0.5
ANNEAL_LEARNING_RATE = True
CLIP_VLOSS = True
NORM_ADV = True
def parse_args():
# fmt: off
parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=DEFAULT_SEED,
help="seed of the experiment")
parser.add_argument("--path", type=str, default=ENV_PATH,
help="enviroment path")
parser.add_argument("--workerID", type=int, default=WORKER_ID,
help="unity worker ID")
parser.add_argument("--baseport", type=int, default=BASE_PORT,
help="port to connect to Unity environment")
parser.add_argument("--lr", type=float, default=LEARNING_RATE,
help="the learning rate of optimizer")
parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="if toggled, cuda will be enabled by default")
parser.add_argument("--total-timesteps", type=int, default=TOTAL_STEPS,
help="total timesteps of the experiments")
parser.add_argument("--stepNum", type=int, default=STEP_NUM,
help="the number of steps to run in each environment per policy rollout")
parser.add_argument("--minibatchesNum", type=int, default=MINIBATCH_NUM,
help="the number of mini-batches")
parser.add_argument("--epochs", type=int, default=EPOCHS,
help="the K epochs to update the policy")
parser.add_argument("--annealLR", type=lambda x: bool(strtobool(x)), default=ANNEAL_LEARNING_RATE, nargs="?", const=True,
help="Toggle learning rate annealing for policy and value networks")
# GAE
parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="Use GAE for advantage computation")
parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=NORM_ADV, nargs="?", const=True,
help="Toggles advantages normalization")
parser.add_argument("--gamma", type=float, default=GAMMA,
help="the discount factor gamma")
parser.add_argument("--gaeLambda", type=float, default=GAE_LAMBDA,
help="the lambda for the general advantage estimation")
parser.add_argument("--clip-coef", type=float, default=CLIP_COEF,
help="the surrogate clipping coefficient")
parser.add_argument("--ent-coef", type=float, default=ENTROPY_COEF,
help="coefficient of the entropy")
parser.add_argument("--critic-coef", type=float, default=CRITIC_COEF,
help="coefficient of the value function")
parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=CLIP_VLOSS, nargs="?", const=True,
help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
parser.add_argument("--max-grad-norm", type=float, default=0.5,
help="the maximum norm for the gradient clipping")
parser.add_argument("--target-kl", type=float, default=None,
help="the target KL divergence threshold")
# fmt: on
args = parser.parse_args()
return args
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
torch.nn.init.orthogonal_(layer.weight, std)
torch.nn.init.constant_(layer.bias, bias_const)
return layer
class PPOAgent(nn.Module):
def __init__(self, env: Aimbot):
super(PPOAgent, self).__init__()
self.discrete_size = env.unity_discrete_size
self.discrete_shape = list(env.unity_discrete_branches)
self.network = nn.Sequential(
layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 128)),
nn.Tanh(),
layer_init(nn.Linear(128, 128)),
nn.ReLU(),
layer_init(nn.Linear(128, 128)),
nn.ReLU(),
)
self.dis_Actor = layer_init(nn.Linear(128, self.discrete_size), std=0.01)
self.critic = layer_init(nn.Linear(128, 1), std=1)
def get_value(self, state: torch.Tensor):
return self.critic(self.network(state))
def get_actions_value(self, state: torch.Tensor, actions=None):
hidden = self.network(state)
dis_logits = self.dis_Actor(hidden)
split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)
multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]
if actions is None:
actions = torch.stack([ctgr.sample() for ctgr in multi_categoricals])
log_prob = torch.stack(
[ctgr.log_prob(act) for act, ctgr in zip(actions, multi_categoricals)]
)
entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])
return actions.T, log_prob.sum(0), entropy.sum(0), self.critic(hidden)
if __name__ == "__main__":
args = parse_args()
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
# Initialize environment anget optimizer
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport)
agent = PPOAgent(env).to(device)
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
# Memory Record
obs = torch.zeros((args.stepNum, env.unity_agent_num) + env.unity_observation_shape).to(device)
actions = torch.zeros((args.stepNum, env.unity_agent_num) + (env.unity_discrete_type,)).to(
device
)
logprobs = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
rewards = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
dones = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
values = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
# TRY NOT TO MODIFY: start the game
args.batch_size = int(env.unity_agent_num * args.stepNum)
args.minibatch_size = int(args.batch_size // args.minibatchesNum)
total_update_step = args.total_timesteps // args.batch_size
global_step = 0
start_time = time.time()
next_obs, _, _ = env.reset()
next_obs = torch.Tensor(next_obs).to(device)
next_done = torch.zeros(env.unity_agent_num).to(device)
for total_steps in range(total_update_step):
# discunt learning rate, while step == total_update_step lr will be 0
if args.annealLR:
frac = 1.0 - (total_steps - 1.0) / total_update_step
lrnow = frac * args.lr
optimizer.param_groups[0]["lr"] = lrnow
# MAIN LOOP: run agent in environment
for step in range(args.stepNum):
print(step)
global_step += 1 * env.unity_agent_num
obs[step] = next_obs
dones[step] = next_done
with torch.no_grad():
# predict actions
action, logprob, _, value = agent.get_actions_value(next_obs)
value = value.flatten()
next_obs, reward, done = env.step(action.cpu().numpy())
# save memories
actions[step] = action
logprobs[step] = logprob
values[step] = value
rewards[step] = torch.tensor(reward).to(device).view(-1)
next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(device)
# GAE
with torch.no_grad():
next_value = agent.get_value(next_obs).reshape(1, -1)
if args.gae:
advantages = torch.zeros_like(rewards).to(device)
lastgaelam = 0
for t in reversed(range(args.stepNum)):
if t == args.stepNum - 1:
nextnonterminal = 1.0 - next_done
nextvalues = next_value
else:
nextnonterminal = 1.0 - dones[t + 1]
nextvalues = values[t + 1]
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
advantages[t] = lastgaelam = (
delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
)
returns = advantages + values
else:
returns = torch.zeros_like(rewards).to(device)
for t in reversed(range(args.stepNum)):
if t == args.stepNum - 1:
nextnonterminal = 1.0 - next_done
next_return = next_value
else:
nextnonterminal = 1.0 - dones[t + 1]
next_return = returns[t + 1]
returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
advantages = returns - values
# flatten the batch
b_obs = obs.reshape((-1,) + env.unity_observation_shape)
b_logprobs = logprobs.reshape(-1)
b_actions = actions.reshape((-1,) + (env.unity_discrete_type,))
b_advantages = advantages.reshape(-1)
b_returns = returns.reshape(-1)
b_values = values.reshape(-1)
# Optimizing the policy and value network
b_inds = np.arange(args.batch_size)
clipfracs = []
for epoch in range(args.epochs):
# shuffle all datasets
np.random.shuffle(b_inds)
for start in range(0, args.batch_size, args.minibatch_size):
end = start + args.minibatch_size
mb_inds = b_inds[start:end]
mb_advantages = b_advantages[mb_inds]
# normalize advantages
if args.norm_adv:
mb_advantages = (mb_advantages - mb_advantages.mean()) / (
mb_advantages.std() + 1e-8
)
# ratio
_, newlogprob, entropy, newvalue = agent.get_actions_value(
b_obs[mb_inds], b_actions.long()[mb_inds].T
)
logratio = newlogprob - b_logprobs[mb_inds]
ratio = logratio.exp()
# early stop
with torch.no_grad():
# calculate approx_kl http://joschu.net/blog/kl-approx.html
old_approx_kl = (-logratio).mean()
approx_kl = ((ratio - 1) - logratio).mean()
clipfracs += [((ratio - 1.0).abs() > args.clip_coef).float().mean().item()]
# Policy loss
pg_loss1 = -mb_advantages * ratio
pg_loss2 = -mb_advantages * torch.clamp(
ratio, 1 - args.clip_coef, 1 + args.clip_coef
)
pg_loss = torch.max(pg_loss1, pg_loss2).mean()
# Value loss
newvalue = newvalue.view(-1)
if args.clip_vloss:
v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2
v_clipped = b_values[mb_inds] + torch.clamp(
newvalue - b_values[mb_inds],
-args.clip_coef,
args.clip_coef,
)
v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2
v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
v_loss = 0.5 * v_loss_max.mean()
else:
v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean()
entropy_loss = entropy.mean()
loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.critic_coef
optimizer.zero_grad()
loss.backward()
# Clips gradient norm of an iterable of parameters.
nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
optimizer.step()
if args.target_kl is not None:
if approx_kl > args.target_kl:
break

View File

@ -0,0 +1,7 @@
from AimbotGym import Aimbot
ENV_PATH = "../Build-ParallelEnv/Aimbot-ParallelEnv"
WORKER_ID = 1
BASE_PORT = 2002
env = Aimbot(envPath=ENV_PATH,workerID= WORKER_ID,basePort= BASE_PORT)

View File

@ -0,0 +1,453 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Action, 1 continuous ctrl 2.1\n",
"Action, 0 continuous ctrl -1.1\n"
]
}
],
"source": [
"import gym\n",
"from gym.spaces import Dict, Discrete, Box, Tuple\n",
"import numpy as np\n",
"\n",
"\n",
"class SampleGym(gym.Env):\n",
" def __init__(self, config={}):\n",
" self.config = config\n",
" self.action_space = Tuple((Discrete(2), Box(-10, 10, (2,))))\n",
" self.observation_space = Box(-10, 10, (2, 2))\n",
" self.p_done = config.get(\"p_done\", 0.1)\n",
"\n",
" def reset(self):\n",
" return self.observation_space.sample()\n",
"\n",
" def step(self, action):\n",
" chosen_action = action[0]\n",
" cnt_control = action[1][chosen_action]\n",
"\n",
" if chosen_action == 0:\n",
" reward = cnt_control\n",
" else:\n",
" reward = -cnt_control - 1\n",
"\n",
" print(f\"Action, {chosen_action} continuous ctrl {cnt_control}\")\n",
" return (\n",
" self.observation_space.sample(),\n",
" reward,\n",
" bool(np.random.choice([True, False], p=[self.p_done, 1.0 - self.p_done])),\n",
" {},\n",
" )\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" env = SampleGym()\n",
" env.reset()\n",
" env.step((1, [-1, 2.1])) # should say use action 1 with 2.1\n",
" env.step((0, [-1.1, 2.1])) # should say use action 0 with -1.1"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from mlagents_envs.environment import UnityEnvironment\n",
"from gym_unity.envs import UnityToGymWrapper\n",
"import numpy as np\n",
"\n",
"ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 2002\n",
"\n",
"env = UnityEnvironment(\n",
" file_name=ENV_PATH,\n",
" seed=1,\n",
" side_channels=[],\n",
" worker_id=WORKER_ID,\n",
" base_port=BASE_PORT,\n",
")\n",
"\n",
"trackedAgent = 0\n",
"env.reset()\n",
"BEHA_SPECS = env.behavior_specs\n",
"BEHA_NAME = list(BEHA_SPECS)[0]\n",
"SPEC = BEHA_SPECS[BEHA_NAME]\n",
"print(SPEC)\n",
"\n",
"decisionSteps, terminalSteps = env.get_steps(BEHA_NAME)\n",
"\n",
"if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される\n",
" nextState = decisionSteps[trackedAgent].obs[0]\n",
" reward = decisionSteps[trackedAgent].reward\n",
" done = False\n",
"if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される\n",
" nextState = terminalSteps[trackedAgent].obs[0]\n",
" reward = terminalSteps[trackedAgent].reward\n",
" done = True\n",
"print(decisionSteps.agent_id)\n",
"print(terminalSteps)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"decisionSteps.agent_id [1 2 5 7]\n",
"decisionSteps.agent_id_to_index {1: 0, 2: 1, 5: 2, 7: 3}\n",
"decisionSteps.reward [0. 0. 0. 0.]\n",
"decisionSteps.action_mask [array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False],\n",
" [False, False],\n",
" [False, False],\n",
" [False, False]])]\n",
"decisionSteps.obs [ 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. -15.994009 1. -26.322788 1.\n",
" 1. 1. 1. 1. 1. 2.\n",
" 1. 1. 1. 1. 1. 1.\n",
" 1. 1.3519633 1.6946528 2.3051548 3.673389 9.067246\n",
" 17.521473 21.727095 22.753294 24.167128 25.905216 18.35725\n",
" 21.02278 21.053417 0. ]\n"
]
},
{
"data": {
"text/plain": [
"'decisionSteps.obs [array([[-15.994009 , 1. , -26.322788 , 1. , 1. ,\\n 1. , 1. , 1. , 1. , 2. ,\\n 1. , 1. , 1. , 1. , 1. ,\\n 1. , 1. , 1.3519633, 1.6946528, 2.3051548,\\n 3.673389 , 9.067246 , 17.521473 , 21.727095 , 22.753294 ,\\n 24.167128 , 25.905216 , 18.35725 , 21.02278 , 21.053417 ,\\n 0. ],\\n [ -1.8809433, 1. , -25.66834 , 1. , 2. ,\\n 1. , 1. , 1. , 1. , 1. ,\\n 1. , 1. , 1. , 1. , 1. ,\\n 1. , 1. , 16.768637 , 23.414627 , 22.04486 ,\\n 21.050663 , 20.486784 , 20.486784 , 21.050665 , 15.049731 ,\\n 11.578419 , 9.695194 , 20.398016 , 20.368341 , 20.398016 ,\\n...\\n 20.551746 , 20.00118 , 20.001116 , 20.551594 , 21.5222 ,\\n 17.707508 , 14.86889 , 19.914494 , 19.885508 , 19.914463 ,\\n 0. ]], dtype=float32)]'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"decisionSteps.agent_id\",decisionSteps.agent_id)\n",
"# decisionSteps.agent_id [1 2 5 7]\n",
"print(\"decisionSteps.agent_id_to_index\",decisionSteps.agent_id_to_index)\n",
"# decisionSteps.agent_id_to_index {1: 0, 2: 1, 5: 2, 7: 3}\n",
"print(\"decisionSteps.reward\",decisionSteps.reward)\n",
"# decisionSteps.reward [0. 0. 0. 0.]\n",
"print(\"decisionSteps.action_mask\",decisionSteps.action_mask)\n",
"'''\n",
"decisionSteps.action_mask [array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False],\n",
" [False, False],\n",
" [False, False],\n",
" [False, False]])]\n",
"'''\n",
"print(\"decisionSteps.obs\", decisionSteps.obs[0][0])\n",
"'''decisionSteps.obs [array([[-15.994009 , 1. , -26.322788 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 2. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.3519633, 1.6946528, 2.3051548,\n",
" 3.673389 , 9.067246 , 17.521473 , 21.727095 , 22.753294 ,\n",
" 24.167128 , 25.905216 , 18.35725 , 21.02278 , 21.053417 ,\n",
" 0. ],\n",
" [ -1.8809433, 1. , -25.66834 , 1. , 2. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 16.768637 , 23.414627 , 22.04486 ,\n",
" 21.050663 , 20.486784 , 20.486784 , 21.050665 , 15.049731 ,\n",
" 11.578419 , 9.695194 , 20.398016 , 20.368341 , 20.398016 ,\n",
"...\n",
" 20.551746 , 20.00118 , 20.001116 , 20.551594 , 21.5222 ,\n",
" 17.707508 , 14.86889 , 19.914494 , 19.885508 , 19.914463 ,\n",
" 0. ]], dtype=float32)]'''\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from AimbotEnv import Aimbot\n",
"\n",
"ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 2002\n",
"\n",
"env = Aimbot(envPath=ENV_PATH,workerID= WORKER_ID,basePort= BASE_PORT)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([[ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -15.994009 , 1. , -26.322788 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 2. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.3519633, 1.6946528,\n",
" 2.3051548, 3.673389 , 9.067246 , 17.521473 , 21.727095 ,\n",
" 22.753294 , 24.167128 , 25.905216 , 18.35725 , 21.02278 ,\n",
" 21.053417 , 0. , -15.994003 , 1. , -26.322784 ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1.3519667,\n",
" 1.6946585, 2.3051722, 3.6734192, 9.067533 , 21.145092 ,\n",
" 21.727148 , 22.753365 , 24.167217 , 25.905317 , 18.358263 ,\n",
" 21.022812 , 21.053455 , 0. ],\n",
" [ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -1.8809433, 1. , -25.66834 , 1. ,\n",
" 2. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 16.768637 , 23.414627 ,\n",
" 22.04486 , 21.050663 , 20.486784 , 20.486784 , 21.050665 ,\n",
" 15.049731 , 11.578419 , 9.695194 , 20.398016 , 20.368341 ,\n",
" 20.398016 , 0. , -1.8809433, 1. , -25.66834 ,\n",
" 1. , 1. , 2. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 2. ,\n",
" 2. , 1. , 1. , 1. , 25.098585 ,\n",
" 15.749494 , 22.044899 , 21.050697 , 20.486813 , 20.486813 ,\n",
" 21.050694 , 15.049746 , 3.872317 , 3.789325 , 20.398046 ,\n",
" 20.368372 , 20.398046 , 0. ],\n",
" [ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -13.672583 , 1. , -26.479263 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 5.3249803, 6.401276 ,\n",
" 8.374101 , 12.8657875, 21.302414 , 21.30242 , 21.888742 ,\n",
" 22.92251 , 24.346794 , 26.09773 , 21.210114 , 21.179258 ,\n",
" 21.210117 , 0. , -13.672583 , 1. , -26.479263 ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 2. , 1. , 1. ,\n",
" 2. , 1. , 1. , 2. , 5.3249855,\n",
" 6.4012837, 8.374114 , 12.865807 , 21.302446 , 21.30245 ,\n",
" 16.168503 , 22.922543 , 24.346823 , 7.1110754, 21.210148 ,\n",
" 21.17929 , 12.495141 , 0. ],\n",
" [ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -4.9038744, 1. , -25.185507 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 20.33171 , 22.859762 ,\n",
" 21.522427 , 20.551746 , 20.00118 , 20.001116 , 20.551594 ,\n",
" 21.5222 , 17.707508 , 14.86889 , 19.914494 , 19.885508 ,\n",
" 19.914463 , 0. , -4.9038773, 1. , -25.185507 ,\n",
" 1. , 2. , 1. , 2. , 1. ,\n",
" 1. , 1. , 1. , 2. , 1. ,\n",
" 1. , 1. , 1. , 1. , 15.905993 ,\n",
" 22.85977 , 11.566693 , 20.551773 , 20.00121 , 20.001146 ,\n",
" 20.551619 , 7.135157 , 17.707582 , 14.868943 , 19.914528 ,\n",
" 19.88554 , 19.914494 , 0. ]], dtype=float32),\n",
" [[-0.05], [-0.05], [-0.05], [-0.05]],\n",
" [[False], [False], [False], [False]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"env.unity_observation_shape\n",
"(128, 4) + env.unity_observation_shape\n",
"env.reset()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0 0 0 0]\n",
" [0 0 0 0]\n",
" [0 0 0 0]\n",
" [0 0 0 0]]\n",
"[[0]\n",
" [0]\n",
" [0]\n",
" [0]]\n",
"[[0 0 0]\n",
" [0 0 0]\n",
" [0 0 0]\n",
" [0 0 0]]\n"
]
},
{
"data": {
"text/plain": [
"([array([ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -15.994009 , 1. , -26.322788 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 2. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.3519633, 1.6946528,\n",
" 2.3051548, 3.673389 , 9.067246 , 17.521473 , 21.727095 ,\n",
" 22.753294 , 24.167128 , 25.905216 , 18.35725 , 21.02278 ,\n",
" 21.053417 , 0. , -15.994003 , 1. , -26.322784 ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 2. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1.3519667,\n",
" 1.6946585, 2.3051722, 3.6734192, 9.067533 , 17.521563 ,\n",
" 21.727148 , 22.753365 , 24.167217 , 25.905317 , 18.358263 ,\n",
" 21.022812 , 21.053455 , 0. ], dtype=float32),\n",
" array([ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -1.8809433, 1. , -25.66834 , 1. ,\n",
" 2. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 16.768637 , 23.414627 ,\n",
" 22.04486 , 21.050663 , 20.486784 , 20.486784 , 21.050665 ,\n",
" 15.049731 , 11.578419 , 9.695194 , 20.398016 , 20.368341 ,\n",
" 20.398016 , 0. , -1.8809433, 1. , -25.66834 ,\n",
" 1. , 2. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 16.768671 ,\n",
" 23.414669 , 22.044899 , 21.050697 , 20.486813 , 20.486813 ,\n",
" 21.050694 , 15.049746 , 11.578423 , 9.695195 , 20.398046 ,\n",
" 20.368372 , 20.398046 , 0. ], dtype=float32),\n",
" array([ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -13.672583 , 1. , -26.479263 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 5.3249803, 6.401276 ,\n",
" 8.374101 , 12.8657875, 21.302414 , 21.30242 , 21.888742 ,\n",
" 22.92251 , 24.346794 , 26.09773 , 21.210114 , 21.179258 ,\n",
" 21.210117 , 0. , -13.672583 , 1. , -26.479263 ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 5.3249855,\n",
" 6.4012837, 8.374114 , 12.865807 , 21.302446 , 21.30245 ,\n",
" 21.888773 , 22.922543 , 24.346823 , 26.097757 , 21.210148 ,\n",
" 21.17929 , 21.21015 , 0. ], dtype=float32),\n",
" array([ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -4.9038744, 1. , -25.185507 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 20.33171 , 22.859762 ,\n",
" 21.522427 , 20.551746 , 20.00118 , 20.001116 , 20.551594 ,\n",
" 21.5222 , 17.707508 , 14.86889 , 19.914494 , 19.885508 ,\n",
" 19.914463 , 0. , -4.9038773, 1. , -25.185507 ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 20.331783 ,\n",
" 22.85977 , 21.522448 , 20.551773 , 20.00121 , 20.001146 ,\n",
" 20.551619 , 21.522217 , 17.707582 , 14.868943 , 19.914528 ,\n",
" 19.88554 , 19.914494 , 0. ], dtype=float32)],\n",
" [[-0.05], [-0.05], [-0.05], [-0.05]],\n",
" [[False], [False], [False], [False]])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"actions = np.zeros_like(np.arange(16).reshape(4, 4))\n",
"print(actions)\n",
"env.step(actions)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long