Change Learning timing
change learning timing to each episode end.
This commit is contained in:
parent
a0895c7449
commit
32d398dbef
@ -8,6 +8,7 @@ import torch.nn as nn
|
|||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
|
|
||||||
from AimbotEnv import Aimbot
|
from AimbotEnv import Aimbot
|
||||||
|
from tqdm import tqdm
|
||||||
from torch.distributions.normal import Normal
|
from torch.distributions.normal import Normal
|
||||||
from torch.distributions.categorical import Categorical
|
from torch.distributions.categorical import Categorical
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
@ -16,34 +17,34 @@ from torch.utils.tensorboard import SummaryWriter
|
|||||||
bestReward = 0
|
bestReward = 0
|
||||||
|
|
||||||
DEFAULT_SEED = 9331
|
DEFAULT_SEED = 9331
|
||||||
ENV_PATH = "../Build/Build-ParallelEnv-BigArea-6Enemy/Aimbot-ParallelEnv"
|
ENV_PATH = "../Build/Build-ParallelEnv-BigArea-6Enemy-EndBonus/Aimbot-ParallelEnv"
|
||||||
WAND_ENTITY = "koha9"
|
WAND_ENTITY = "koha9"
|
||||||
WORKER_ID = 1
|
WORKER_ID = 1
|
||||||
BASE_PORT = 1000
|
BASE_PORT = 1000
|
||||||
|
|
||||||
|
# max round steps per agent is 2500, 25 seconds
|
||||||
|
|
||||||
TOTAL_STEPS = 2000000
|
TOTAL_STEPS = 4000000
|
||||||
STEP_NUM = 314
|
BATCH_SIZE = 512
|
||||||
|
MAX_TRAINNING_DATASETS = 8000
|
||||||
DECISION_PERIOD = 2
|
DECISION_PERIOD = 2
|
||||||
LEARNING_RATE = 7e-4
|
LEARNING_RATE = 7e-4
|
||||||
GAMMA = 0.99
|
GAMMA = 0.99
|
||||||
GAE_LAMBDA = 0.95
|
GAE_LAMBDA = 0.95
|
||||||
MINIBATCH_NUM = 4
|
|
||||||
EPOCHS = 4
|
EPOCHS = 4
|
||||||
CLIP_COEF = 0.1
|
CLIP_COEF = 0.1
|
||||||
POLICY_COEF = 1.0
|
POLICY_COEF = 1.0
|
||||||
ENTROPY_COEF = 0.01
|
ENTROPY_COEF = 0.01
|
||||||
CRITIC_COEF = 0.5
|
CRITIC_COEF = 0.5
|
||||||
|
|
||||||
ANNEAL_LEARNING_RATE = True
|
ANNEAL_LEARNING_RATE = False
|
||||||
CLIP_VLOSS = True
|
CLIP_VLOSS = True
|
||||||
NORM_ADV = True
|
NORM_ADV = True
|
||||||
TRAIN = True
|
TRAIN = False
|
||||||
|
|
||||||
WANDB_TACK = False
|
WANDB_TACK = False
|
||||||
LOAD_DIR = None
|
LOAD_DIR = None
|
||||||
# LOAD_DIR = "../PPO-Model/SmallArea-256-128-hybrid-2nd-trainning.pt"
|
LOAD_DIR = "../PPO-Model/bigArea-4.pt"
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
# fmt: off
|
# fmt: off
|
||||||
@ -67,10 +68,10 @@ def parse_args():
|
|||||||
# model parameters
|
# model parameters
|
||||||
parser.add_argument("--train",type=lambda x: bool(strtobool(x)), default=TRAIN, nargs="?", const=True,
|
parser.add_argument("--train",type=lambda x: bool(strtobool(x)), default=TRAIN, nargs="?", const=True,
|
||||||
help="Train Model or not")
|
help="Train Model or not")
|
||||||
parser.add_argument("--stepNum", type=int, default=STEP_NUM,
|
parser.add_argument("--datasetSize", type=int, default=MAX_TRAINNING_DATASETS,
|
||||||
help="the number of steps to run in each environment per policy rollout")
|
help="training dataset size,start training while dataset collect enough data")
|
||||||
parser.add_argument("--minibatchesNum", type=int, default=MINIBATCH_NUM,
|
parser.add_argument("--minibatchSize", type=int, default=BATCH_SIZE,
|
||||||
help="the number of mini-batches")
|
help="nimi batch size")
|
||||||
parser.add_argument("--epochs", type=int, default=EPOCHS,
|
parser.add_argument("--epochs", type=int, default=EPOCHS,
|
||||||
help="the K epochs to update the policy")
|
help="the K epochs to update the policy")
|
||||||
parser.add_argument("--annealLR", type=lambda x: bool(strtobool(x)), default=ANNEAL_LEARNING_RATE, nargs="?", const=True,
|
parser.add_argument("--annealLR", type=lambda x: bool(strtobool(x)), default=ANNEAL_LEARNING_RATE, nargs="?", const=True,
|
||||||
@ -179,6 +180,40 @@ class PPOAgent(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def GAE(agent, args, rewards, dones, values, next_obs, next_done):
|
||||||
|
# GAE
|
||||||
|
with torch.no_grad():
|
||||||
|
next_value = agent.get_value(next_obs).reshape(1, -1)
|
||||||
|
data_size = rewards.size()[0]
|
||||||
|
if args.gae:
|
||||||
|
advantages = torch.zeros_like(rewards).to(device)
|
||||||
|
lastgaelam = 0
|
||||||
|
for t in reversed(range(data_size)):
|
||||||
|
if t == data_size - 1:
|
||||||
|
nextnonterminal = 1.0 - next_done
|
||||||
|
nextvalues = next_value
|
||||||
|
else:
|
||||||
|
nextnonterminal = 1.0 - dones[t + 1]
|
||||||
|
nextvalues = values[t + 1]
|
||||||
|
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
|
||||||
|
advantages[t] = lastgaelam = (
|
||||||
|
delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
|
||||||
|
)
|
||||||
|
returns = advantages + values
|
||||||
|
else:
|
||||||
|
returns = torch.zeros_like(rewards).to(device)
|
||||||
|
for t in reversed(range(data_size)):
|
||||||
|
if t == data_size - 1:
|
||||||
|
nextnonterminal = 1.0 - next_done
|
||||||
|
next_return = next_value
|
||||||
|
else:
|
||||||
|
nextnonterminal = 1.0 - dones[t + 1]
|
||||||
|
next_return = returns[t + 1]
|
||||||
|
returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
|
||||||
|
advantages = returns - values
|
||||||
|
return advantages, returns
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
random.seed(args.seed)
|
random.seed(args.seed)
|
||||||
@ -199,11 +234,11 @@ if __name__ == "__main__":
|
|||||||
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
|
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
|
||||||
|
|
||||||
# Tensorboard and WandB Recorder
|
# Tensorboard and WandB Recorder
|
||||||
game_name = "Aimbot"
|
game_name = "Aimbot-BigArea-6Enemy-EndBonus"
|
||||||
run_name = f"{game_name}_{args.seed}_{int(time.time())}"
|
run_name = f"{game_name}_{args.seed}_{int(time.time())}"
|
||||||
if args.wandb_track:
|
if args.wandb_track:
|
||||||
wandb.init(
|
wandb.init(
|
||||||
project=run_name,
|
project=game_name,
|
||||||
entity=args.wandb_entity,
|
entity=args.wandb_entity,
|
||||||
sync_tensorboard=True,
|
sync_tensorboard=True,
|
||||||
config=vars(args),
|
config=vars(args),
|
||||||
@ -219,94 +254,165 @@ if __name__ == "__main__":
|
|||||||
% ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
|
% ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Memory Record
|
# Trajectory Buffer
|
||||||
obs = torch.zeros((args.stepNum, env.unity_agent_num) + env.unity_observation_shape).to(device)
|
ob_bf = [[] for i in range(env.unity_agent_num)]
|
||||||
actions = torch.zeros((args.stepNum, env.unity_agent_num) + (env.unity_action_size,)).to(device)
|
act_bf = [[] for i in range(env.unity_agent_num)]
|
||||||
dis_logprobs = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
dis_logprobs_bf = [[] for i in range(env.unity_agent_num)]
|
||||||
con_logprobs = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
con_logprobs_bf = [[] for i in range(env.unity_agent_num)]
|
||||||
rewards = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
rewards_bf = [[] for i in range(env.unity_agent_num)]
|
||||||
dones = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
dones_bf = [[] for i in range(env.unity_agent_num)]
|
||||||
values = torch.zeros((args.stepNum, env.unity_agent_num)).to(device)
|
values_bf = [[] for i in range(env.unity_agent_num)]
|
||||||
|
|
||||||
# TRY NOT TO MODIFY: start the game
|
# TRY NOT TO MODIFY: start the game
|
||||||
args.batch_size = int(env.unity_agent_num * args.stepNum)
|
total_update_step = args.total_timesteps // args.datasetSize
|
||||||
args.minibatch_size = int(args.batch_size // args.minibatchesNum)
|
|
||||||
total_update_step = args.total_timesteps // args.batch_size
|
|
||||||
global_step = 0
|
global_step = 0
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
next_obs, _, _ = env.reset()
|
state, _, done = env.reset()
|
||||||
next_obs = torch.Tensor(next_obs).to(device)
|
# state = torch.Tensor(next_obs).to(device)
|
||||||
next_done = torch.zeros(env.unity_agent_num).to(device)
|
# next_done = torch.zeros(env.unity_agent_num).to(device)
|
||||||
|
|
||||||
for total_steps in range(total_update_step):
|
for total_steps in range(total_update_step):
|
||||||
# discunt learning rate, while step == total_update_step lr will be 0
|
# discunt learning rate, while step == total_update_step lr will be 0
|
||||||
|
print("new episode")
|
||||||
if args.annealLR:
|
if args.annealLR:
|
||||||
frac = 1.0 - (total_steps - 1.0) / total_update_step
|
frac = 1.0 - (total_steps - 1.0) / total_update_step
|
||||||
lrnow = frac * args.lr
|
lrnow = frac * args.lr
|
||||||
optimizer.param_groups[0]["lr"] = lrnow
|
optimizer.param_groups[0]["lr"] = lrnow
|
||||||
|
|
||||||
|
# initialize empty training datasets
|
||||||
|
obs = torch.tensor([]).to(device) # (n,env.unity_observation_size)
|
||||||
|
actions = torch.tensor([]).to(device) # (n,env.unity_action_size)
|
||||||
|
dis_logprobs = torch.tensor([]).to(device) # (n,1)
|
||||||
|
con_logprobs = torch.tensor([]).to(device) # (n,1)
|
||||||
|
rewards = torch.tensor([]).to(device) # (n,1)
|
||||||
|
values = torch.tensor([]).to(device) # (n,1)
|
||||||
|
advantages = torch.tensor([]).to(device) # (n,1)
|
||||||
|
returns = torch.tensor([]).to(device) # (n,1)
|
||||||
|
|
||||||
# MAIN LOOP: run agent in environment
|
# MAIN LOOP: run agent in environment
|
||||||
for i in range(args.stepNum * args.decision_period):
|
i = 0
|
||||||
|
training = False
|
||||||
|
while True:
|
||||||
if i % args.decision_period == 0:
|
if i % args.decision_period == 0:
|
||||||
step = round(i / args.decision_period)
|
step = round(i / args.decision_period)
|
||||||
# Choose action by agent
|
# Choose action by agent
|
||||||
global_step += 1 * env.unity_agent_num
|
global_step += 1 * env.unity_agent_num
|
||||||
obs[step] = next_obs
|
|
||||||
dones[step] = next_done
|
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
# predict actions
|
# predict actions
|
||||||
action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value(
|
action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value(
|
||||||
next_obs
|
torch.Tensor(state).to(device)
|
||||||
)
|
)
|
||||||
value = value.flatten()
|
value = value.flatten()
|
||||||
next_obs, reward, done = env.step(action.cpu().numpy())
|
|
||||||
|
# variable from GPU to CPU
|
||||||
|
action_cpu = action.cpu().numpy()
|
||||||
|
dis_logprob_cpu = dis_logprob.cpu().numpy()
|
||||||
|
con_logprob_cpu = con_logprob.cpu().numpy()
|
||||||
|
value_cpu = value.cpu().numpy()
|
||||||
|
# Environment step
|
||||||
|
next_state, reward, next_done = env.step(action_cpu)
|
||||||
|
|
||||||
# save memories
|
# save memories
|
||||||
actions[step] = action
|
for i in range(env.unity_agent_num):
|
||||||
dis_logprobs[step] = dis_logprob
|
# save memories to buffers
|
||||||
con_logprobs[step] = con_logprob
|
ob_bf[i].append(state[i])
|
||||||
values[step] = value
|
act_bf[i].append(action_cpu[i])
|
||||||
rewards[step] = torch.tensor(reward).to(device).view(-1)
|
dis_logprobs_bf[i].append(dis_logprob_cpu[i])
|
||||||
next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(
|
con_logprobs_bf[i].append(con_logprob_cpu[i])
|
||||||
device
|
rewards_bf[i].append(reward[i])
|
||||||
)
|
dones_bf[i].append(done[i])
|
||||||
|
values_bf[i].append(value_cpu[i])
|
||||||
|
if next_done[i] == True:
|
||||||
|
# finished a round, send finished memories to training datasets
|
||||||
|
# compute advantage and discounted reward
|
||||||
|
adv, rt = GAE(
|
||||||
|
agent,
|
||||||
|
args,
|
||||||
|
torch.tensor(rewards_bf[i]).to(device),
|
||||||
|
torch.Tensor(dones_bf[i]).to(device),
|
||||||
|
torch.tensor(values_bf[i]).to(device),
|
||||||
|
torch.tensor(next_state[i]).to(device),
|
||||||
|
torch.Tensor([next_done[i]]).to(device),
|
||||||
|
)
|
||||||
|
# send memories to training datasets
|
||||||
|
obs = torch.cat((obs, torch.tensor(ob_bf[i]).to(device)), 0)
|
||||||
|
actions = torch.cat((actions, torch.tensor(act_bf[i]).to(device)), 0)
|
||||||
|
dis_logprobs = torch.cat(
|
||||||
|
(dis_logprobs, torch.tensor(dis_logprobs_bf[i]).to(device)), 0
|
||||||
|
)
|
||||||
|
con_logprobs = torch.cat(
|
||||||
|
(con_logprobs, torch.tensor(con_logprobs_bf[i]).to(device)), 0
|
||||||
|
)
|
||||||
|
rewards = torch.cat((rewards, torch.tensor(rewards_bf[i]).to(device)), 0)
|
||||||
|
values = torch.cat((values, torch.tensor(values_bf[i]).to(device)), 0)
|
||||||
|
advantages = torch.cat((advantages, adv), 0)
|
||||||
|
returns = torch.cat((returns, rt), 0)
|
||||||
|
|
||||||
|
# clear buffers
|
||||||
|
ob_bf[i] = []
|
||||||
|
act_bf[i] = []
|
||||||
|
dis_logprobs_bf[i] = []
|
||||||
|
con_logprobs_bf[i] = []
|
||||||
|
rewards_bf[i] = []
|
||||||
|
dones_bf[i] = []
|
||||||
|
values_bf[i] = []
|
||||||
|
print(f"train dataset:{obs.size()[0]}/{args.datasetSize}")
|
||||||
|
|
||||||
|
if obs.size()[0] >= args.datasetSize:
|
||||||
|
# start train NN
|
||||||
|
break
|
||||||
|
state, done = next_state, next_done
|
||||||
else:
|
else:
|
||||||
# skip this step use last predict action
|
# skip this step use last predict action
|
||||||
next_obs, reward, done = env.step(action.cpu().numpy())
|
next_obs, reward, done = env.step(action_cpu)
|
||||||
next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(done).to(
|
# save memories
|
||||||
device
|
for i in range(env.unity_agent_num):
|
||||||
)
|
if next_done[i] == True:
|
||||||
|
# save last memories to buffers
|
||||||
|
ob_bf[i].append(state[i])
|
||||||
|
act_bf[i].append(action_cpu[i])
|
||||||
|
dis_logprobs_bf[i].append(dis_logprob_cpu[i])
|
||||||
|
con_logprobs_bf[i].append(con_logprob_cpu[i])
|
||||||
|
rewards_bf[i].append(reward[i])
|
||||||
|
dones_bf[i].append(done[i])
|
||||||
|
values_bf[i].append(value_cpu[i])
|
||||||
|
# finished a round, send finished memories to training datasets
|
||||||
|
# compute advantage and discounted reward
|
||||||
|
adv, rt = GAE(
|
||||||
|
agent,
|
||||||
|
args,
|
||||||
|
torch.tensor(rewards_bf[i]).to(device),
|
||||||
|
torch.Tensor(dones_bf[i]).to(device),
|
||||||
|
torch.tensor(values_bf[i]).to(device),
|
||||||
|
torch.tensor(next_state[i]).to(device),
|
||||||
|
torch.Tensor([next_done[i]]).to(device),
|
||||||
|
)
|
||||||
|
# send memories to training datasets
|
||||||
|
obs = torch.cat((obs, torch.tensor(ob_bf[i]).to(device)), 0)
|
||||||
|
actions = torch.cat((actions, torch.tensor(act_bf[i]).to(device)), 0)
|
||||||
|
dis_logprobs = torch.cat(
|
||||||
|
(dis_logprobs, torch.tensor(dis_logprobs_bf[i]).to(device)), 0
|
||||||
|
)
|
||||||
|
con_logprobs = torch.cat(
|
||||||
|
(con_logprobs, torch.tensor(con_logprobs_bf[i]).to(device)), 0
|
||||||
|
)
|
||||||
|
rewards = torch.cat((rewards, torch.tensor(rewards_bf[i]).to(device)), 0)
|
||||||
|
values = torch.cat((values, torch.tensor(values_bf[i]).to(device)), 0)
|
||||||
|
advantages = torch.cat((advantages, adv), 0)
|
||||||
|
returns = torch.cat((returns, rt), 0)
|
||||||
|
|
||||||
# GAE
|
# clear buffers
|
||||||
with torch.no_grad():
|
ob_bf[i] = []
|
||||||
next_value = agent.get_value(next_obs).reshape(1, -1)
|
act_bf[i] = []
|
||||||
if args.gae:
|
dis_logprobs_bf[i] = []
|
||||||
advantages = torch.zeros_like(rewards).to(device)
|
con_logprobs_bf[i] = []
|
||||||
lastgaelam = 0
|
rewards_bf[i] = []
|
||||||
for t in reversed(range(args.stepNum)):
|
dones_bf[i] = []
|
||||||
if t == args.stepNum - 1:
|
values_bf[i] = []
|
||||||
nextnonterminal = 1.0 - next_done
|
print(f"train dataset:{obs.size()[0]}/{args.datasetSize}")
|
||||||
nextvalues = next_value
|
state, done = next_state, next_done
|
||||||
else:
|
i += 1
|
||||||
nextnonterminal = 1.0 - dones[t + 1]
|
|
||||||
nextvalues = values[t + 1]
|
|
||||||
delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
|
|
||||||
advantages[t] = lastgaelam = (
|
|
||||||
delta + args.gamma * args.gaeLambda * nextnonterminal * lastgaelam
|
|
||||||
)
|
|
||||||
returns = advantages + values
|
|
||||||
else:
|
|
||||||
returns = torch.zeros_like(rewards).to(device)
|
|
||||||
for t in reversed(range(args.stepNum)):
|
|
||||||
if t == args.stepNum - 1:
|
|
||||||
nextnonterminal = 1.0 - next_done
|
|
||||||
next_return = next_value
|
|
||||||
else:
|
|
||||||
nextnonterminal = 1.0 - dones[t + 1]
|
|
||||||
next_return = returns[t + 1]
|
|
||||||
returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
|
|
||||||
advantages = returns - values
|
|
||||||
|
|
||||||
if args.train:
|
if args.train:
|
||||||
# flatten the batch
|
# flatten the batch
|
||||||
@ -317,15 +423,15 @@ if __name__ == "__main__":
|
|||||||
b_advantages = advantages.reshape(-1)
|
b_advantages = advantages.reshape(-1)
|
||||||
b_returns = returns.reshape(-1)
|
b_returns = returns.reshape(-1)
|
||||||
b_values = values.reshape(-1)
|
b_values = values.reshape(-1)
|
||||||
|
b_size = b_obs.size()[0]
|
||||||
# Optimizing the policy and value network
|
# Optimizing the policy and value network
|
||||||
b_inds = np.arange(args.batch_size)
|
b_inds = np.arange(b_size)
|
||||||
# clipfracs = []
|
# clipfracs = []
|
||||||
for epoch in range(args.epochs):
|
for epoch in range(args.epochs):
|
||||||
# shuffle all datasets
|
# shuffle all datasets
|
||||||
np.random.shuffle(b_inds)
|
np.random.shuffle(b_inds)
|
||||||
for start in range(0, args.batch_size, args.minibatch_size):
|
for start in range(0, b_size, args.minibatchSize):
|
||||||
end = start + args.minibatch_size
|
end = start + args.minibatchSize
|
||||||
mb_inds = b_inds[start:end]
|
mb_inds = b_inds[start:end]
|
||||||
mb_advantages = b_advantages[mb_inds]
|
mb_advantages = b_advantages[mb_inds]
|
||||||
|
|
||||||
|
@ -434,41 +434,119 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"x = torch.randn(2, 3).to(\"cuda\")\n",
|
||||||
|
"print(x)\n",
|
||||||
|
"print(torch.cat((x, x, x), 0))\n",
|
||||||
|
"print(torch.cat((x, x, x), 1))\n",
|
||||||
|
"\n",
|
||||||
|
"aa = torch.empty(0).to(\"cuda\")\n",
|
||||||
|
"torch.cat([aa,x])\n",
|
||||||
|
"bb = [[]]*2\n",
|
||||||
|
"print(bb)\n",
|
||||||
|
"bb.append(x.to(\"cpu\").tolist())\n",
|
||||||
|
"bb.append(x.to(\"cpu\").tolist())\n",
|
||||||
|
"print(bb)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 64,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"x : torch.Size([2, 3, 4])\n",
|
"tensor([[-1.1090, 0.4686, 0.6883],\n",
|
||||||
"x : torch.Size([6, 2, 3, 4])\n",
|
" [-0.1862, -0.3943, -0.0202],\n",
|
||||||
"x : torch.Size([6, 2, 3, 4])\n"
|
" [ 0.1436, -0.9444, -1.2079],\n",
|
||||||
|
" [-2.9434, -2.5989, -0.6653],\n",
|
||||||
|
" [ 0.4668, 0.8548, -0.4641],\n",
|
||||||
|
" [-0.3956, -0.2832, -0.1889],\n",
|
||||||
|
" [-0.2801, -0.2092, 1.7254],\n",
|
||||||
|
" [ 2.7938, -0.7742, 0.7053]], device='cuda:0')\n",
|
||||||
|
"(8, 0)\n",
|
||||||
|
"---\n",
|
||||||
|
"[[array([-1.1090169, 0.4685607, 0.6883437], dtype=float32)], [array([-0.1861974 , -0.39429024, -0.02016036], dtype=float32)], [array([ 0.14360362, -0.9443668 , -1.2079065 ], dtype=float32)], [array([-2.9433894 , -2.598913 , -0.66532046], dtype=float32)], [array([ 0.46684313, 0.8547877 , -0.46408093], dtype=float32)], [array([-0.39563984, -0.2831819 , -0.18891 ], dtype=float32)], [array([-0.28008553, -0.20918302, 1.7253567 ], dtype=float32)], [array([ 2.7938051, -0.7742478, 0.705279 ], dtype=float32)]]\n",
|
||||||
|
"[[array([-1.1090169, 0.4685607, 0.6883437], dtype=float32)], [], [array([ 0.14360362, -0.9443668 , -1.2079065 ], dtype=float32)], [array([-2.9433894 , -2.598913 , -0.66532046], dtype=float32)], [array([ 0.46684313, 0.8547877 , -0.46408093], dtype=float32)], [array([-0.39563984, -0.2831819 , -0.18891 ], dtype=float32)], [array([-0.28008553, -0.20918302, 1.7253567 ], dtype=float32)], [array([ 2.7938051, -0.7742478, 0.705279 ], dtype=float32)]]\n",
|
||||||
|
"---\n",
|
||||||
|
"[array([-1.1090169, 0.4685607, 0.6883437], dtype=float32), array([-1.1090169, 0.4685607, 0.6883437], dtype=float32)]\n",
|
||||||
|
"vvv tensor([[-1.1090, 0.4686, 0.6883],\n",
|
||||||
|
" [-1.1090, 0.4686, 0.6883]], device='cuda:0')\n",
|
||||||
|
"tensor([[-1.1090, 0.4686, 0.6883],\n",
|
||||||
|
" [-1.1090, 0.4686, 0.6883]], device='cuda:0')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 64,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import torch\n",
|
||||||
|
"\n",
|
||||||
|
"agent_num = 8\n",
|
||||||
|
"ob_buffer = [[]for i in range(agent_num)]\n",
|
||||||
|
"obs = torch.randn(8, 3).to(\"cuda\")\n",
|
||||||
|
"print(obs)\n",
|
||||||
|
"print(np.shape(np.array(ob_buffer)))\n",
|
||||||
|
"print('---')\n",
|
||||||
|
"obs_cpu = obs.to(\"cpu\").numpy()\n",
|
||||||
|
"for i in range(agent_num):\n",
|
||||||
|
" ob_buffer[i].append(obs_cpu[i])\n",
|
||||||
|
"print(ob_buffer)\n",
|
||||||
|
"ob_buffer[1] = []\n",
|
||||||
|
"print(ob_buffer)\n",
|
||||||
|
"print('---')\n",
|
||||||
|
"for i in range(agent_num):\n",
|
||||||
|
" ob_buffer[i].append(obs_cpu[i])\n",
|
||||||
|
"print(ob_buffer[0])\n",
|
||||||
|
"vvv = torch.tensor(ob_buffer[0]).to(\"cuda\")\n",
|
||||||
|
"print(\"vvv\",vvv)\n",
|
||||||
|
"empt = torch.tensor([]).to(\"cuda\")\n",
|
||||||
|
"vvvv = torch.cat((empt,vvv),0)\n",
|
||||||
|
"print(vvvv)\n",
|
||||||
|
"vvvv.size()[0]>0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"start 0\n",
|
||||||
|
"end 3\n",
|
||||||
|
"start 3\n",
|
||||||
|
"end 6\n",
|
||||||
|
"start 6\n",
|
||||||
|
"end 9\n",
|
||||||
|
"start 9\n",
|
||||||
|
"end 12\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import torch\n",
|
"for i in range(0,10,3):\n",
|
||||||
"#1\n",
|
" print(\"start\",i)\n",
|
||||||
"x = torch.randn(2, 1, 1)#为1可以扩展为3和4\n",
|
" print('end',i+3)"
|
||||||
"x = x.expand(2, 3, 4)\n",
|
|
||||||
"print('x :', x.size())\n",
|
|
||||||
"\n",
|
|
||||||
"#2\n",
|
|
||||||
"#扩展一个新的维度必须在最前面,否则会报错\n",
|
|
||||||
"#x = x.expand(2, 3, 4, 6)\n",
|
|
||||||
"\n",
|
|
||||||
"x = x.expand(6, 2, 3, 4)\n",
|
|
||||||
"print('x :', x.size())\n",
|
|
||||||
"\n",
|
|
||||||
"#3\n",
|
|
||||||
"#某一个维度为-1表示不改变该维度的大小\n",
|
|
||||||
"x = x.expand(6, -1, -1, -1)\n",
|
|
||||||
"print('x :', x.size())\n",
|
|
||||||
"\n",
|
|
||||||
"x : torch.Size([2, 3, 4])\n",
|
|
||||||
"x : torch.Size([6, 2, 3, 4])\n",
|
|
||||||
"x : torch.Size([6, 2, 3, 4])"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
Loading…
Reference in New Issue
Block a user