对应V3.1.6Play模式

对应V3.1.5的Stay Target进行修改
This commit is contained in:
Koha9 2023-07-29 23:52:20 +09:00
parent f9ee51c256
commit 15c1edb6c9
4 changed files with 71 additions and 61 deletions

View File

@ -68,6 +68,9 @@ class Aimbot(gym.Env):
# agents number # agents number
self.unity_agent_num = len(self.unity_agent_IDS) self.unity_agent_num = len(self.unity_agent_IDS)
# all zero action
self.all_zero_action = np.zeros((self.unity_agent_num, self.unity_action_size))
def reset(self) -> Tuple[np.ndarray, List, List]: def reset(self) -> Tuple[np.ndarray, List, List]:
"""reset environment and get observations """reset environment and get observations

View File

@ -181,31 +181,30 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mkoha9\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
]
},
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"True" "array([[0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.],\n",
" [0., 0., 0., 0.]])"
] ]
}, },
"execution_count": 1, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"import wandb\n", "import numpy as np\n",
"wandb.login()" "np.zeros((8, 4))"
] ]
} }
], ],

View File

@ -34,10 +34,11 @@ if __name__ == "__main__":
# Initialize environment agent optimizer # Initialize environment agent optimizer
aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID) aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID)
env = Aimbot(env_path=args.path, env = Aimbot(
worker_id=args.workerID, env_path=args.path,
base_port=args.baseport, worker_id=args.workerID,
side_channels=[aimbot_side_channel]) base_port=args.baseport,
side_channels=[aimbot_side_channel])
if args.load_dir is None: if args.load_dir is None:
agent = PPOAgent( agent = PPOAgent(
env=env, env=env,
@ -104,7 +105,12 @@ if __name__ == "__main__":
last_reward = [0. for i in range(env.unity_agent_num)] last_reward = [0. for i in range(env.unity_agent_num)]
# MAIN LOOP: run agent in environment # MAIN LOOP: run agent in environment
while True: while True:
# On decision point, choose action by agent # Target Type(state[0][0]) is stay(4),use all zero action
if state[0][0] == 4:
next_state, reward, next_done = env.step(env.all_zero_action)
state, done = next_state, next_done
continue
# On decision point, and Target Type(state[0][0]) is not stay(4) choose action by agent
if step % args.decision_period == 0: if step % args.decision_period == 0:
step += 1 step += 1
# Choose action by agent # Choose action by agent
@ -124,52 +130,54 @@ if __name__ == "__main__":
next_state, reward, next_done = env.step(action_cpu) next_state, reward, next_done = env.step(action_cpu)
# save memories # save memories
ppo_memories.save_memories( if args.train:
now_step=step, ppo_memories.save_memories(
agent=agent, now_step=step,
state=state, agent=agent,
action_cpu=action_cpu, state=state,
dis_logprob_cpu=dis_logprob_cpu, action_cpu=action_cpu,
con_logprob_cpu=con_logprob_cpu, dis_logprob_cpu=dis_logprob_cpu,
reward=reward, con_logprob_cpu=con_logprob_cpu,
done=done, reward=reward,
value_cpu=value_cpu, done=done,
last_reward=last_reward, value_cpu=value_cpu,
next_done=next_done, last_reward=last_reward,
next_state=next_state, next_done=next_done,
) next_state=next_state,
# check if any training dataset is full and ready to train )
for i in range(args.target_num): # check if any training dataset is full and ready to train
if ppo_memories.obs[i].size()[0] >= args.datasetSize: for i in range(args.target_num):
# start train NN if ppo_memories.obs[i].size()[0] >= args.datasetSize:
train_queue.append(i) # start train NN
if len(train_queue) > 0: train_queue.append(i)
# break while loop and start train if len(train_queue) > 0:
break # break while loop and start train
# update state break
# update state
state, done = next_state, next_done state, done = next_state, next_done
else: else:
step += 1 step += 1
# skip this step use last predict action # skip this step use last predict action
next_state, reward, next_done = env.step(action_cpu) next_state, reward, next_done = env.step(action_cpu)
# save memories # save memories
ppo_memories.save_memories( if args.train:
now_step=step, ppo_memories.save_memories(
agent=agent, now_step=step,
state=state, agent=agent,
action_cpu=action_cpu, state=state,
dis_logprob_cpu=dis_logprob_cpu, action_cpu=action_cpu,
con_logprob_cpu=con_logprob_cpu, dis_logprob_cpu=dis_logprob_cpu,
reward=reward, con_logprob_cpu=con_logprob_cpu,
done=done, reward=reward,
value_cpu=value_cpu, done=done,
last_reward=last_reward, value_cpu=value_cpu,
next_done=next_done, last_reward=last_reward,
next_state=next_state, next_done=next_done,
) next_state=next_state,
# update state )
state = next_state # update state
last_reward = reward state = next_state
last_reward = reward
if args.train: if args.train:
# train mode on # train mode on

View File

@ -34,9 +34,9 @@ BROADCASTREWARD = False
ANNEAL_LEARNING_RATE = True ANNEAL_LEARNING_RATE = True
CLIP_VLOSS = True CLIP_VLOSS = True
NORM_ADV = False NORM_ADV = False
TRAIN = True TRAIN = False
SAVE_MODEL = True SAVE_MODEL = False
WANDB_TACK = True WANDB_TACK = False
LOAD_DIR = None LOAD_DIR = None
#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt" #LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"